Home

import sqlite3

import uuid

from datetime import datetime

from strands import tool

@tool

def create_appointment(date: str, location: str, title: str, description: str) -> str:

"""

Create a new personal appointment in the database.

Args:

date (str): Date and time of the appointment (format: YYYY-MM-DD HH:MM).

location (str): Location of the appointment.

title (str): Title of the appointment.

description (str): Description of the appointment.

Returns:

str: Formatted confirmation of the newly created appointment.

Raises:

ValueError: If the date format is invalid.

"""

# Validate date format

try:

datetime.strptime(date, "%Y-%m-%d %H:%M")

except ValueError:

raise ValueError("Date must be in format 'YYYY-MM-DD HH:MM'")

# Generate a unique ID

appointment_id = str(uuid.uuid4())

conn = sqlite3.connect("appointments.db")

cursor = conn.cursor()

# Create the appointments table if it doesn't exist

cursor.execute(

"""

CREATE TABLE IF NOT EXISTS appointments (

id TEXT PRIMARY KEY,

date TEXT,

location TEXT,

title TEXT,

description TEXT

)

"""

)

cursor.execute(

"INSERT INTO appointments (id, date, location, title, description) VALUES (?, ?, ?, ?, ?)",

(appointment_id, date, location, title, description),

)

conn.commit()

conn.close()

# Format the confirmation with same style as get_agenda

time_part = date.split(" ")[1] if " " in date else "No time specified"

date_part = date.split(" ")[0] if " " in date else date

confirmation = [

"✅ Appointment Created Successfully!",

"=====================================",

f"📅 Date: {date_part}",

f"🕐 Time: {time_part}",

f"📍 Location: {location}",

f"📝 Title: {title}",

f"📄 Description: {description}",

f"🆔 ID: {appointment_id}",

"",

"Your appointment has been saved to your calendar!"

]

return "\n".join(confirmation)

import sqlite3

from datetime import datetime

from strands import tool

@tool

def get_agenda(date: str) -> str:

"""

Retrieve the agenda for a specific day, showing all appointments scheduled for that date.

Args:

date (str): Date to get agenda for (format: YYYY-MM-DD).

Returns:

str: Formatted agenda for the specified date with all appointments.

Raises:

ValueError: If the date format is invalid.

"""

# Validate date format

try:

datetime.strptime(date, "%Y-%m-%d")

except ValueError:

raise ValueError("Date must be in format 'YYYY-MM-DD'")

conn = sqlite3.connect("appointments.db")

cursor = conn.cursor()

# Create the appointments table if it doesn't exist

cursor.execute(

"""

CREATE TABLE IF NOT EXISTS appointments (

id TEXT PRIMARY KEY,

date TEXT,

location TEXT,

title TEXT,

description TEXT

)

"""

)

# Query appointments for the specific date (using LIKE to match the date part)

cursor.execute(

"SELECT id, date, location, title, description FROM appointments WHERE date LIKE ? ORDER BY date",

(f"{date}%",)

)

appointments = cursor.fetchall()

conn.close()

if not appointments:

return f"No appointments scheduled for {date}"

# Format the agenda

agenda_lines = [f"📅 Agenda for {date}:", "=" * 30]

for appointment in appointments:

appointment_id, appointment_date, location, title, description = appointment

# Extract time from the datetime string

time_part = appointment_date.split(" ")[1] if " " in appointment_date else "No time specified"

agenda_lines.append(f"🕐 {time_part} - {title}")

agenda_lines.append(f" 📍 Location: {location}")

agenda_lines.append(f" 📝 Description: {description}")

agenda_lines.append(f" 🆔 ID: {appointment_id}")

agenda_lines.append("") # Empty line for spacing

return "\n".join(agenda_lines)

import sqlite3

import os

from strands import tool

@tool

def list_appointments() -> str:

"""

List all available appointments from the database with nice formatting.

Returns:

str: Formatted list of all appointments

"""

# Check if database exists

if not os.path.exists('appointments.db'):

return "📅 No appointments found\n\nYour calendar is empty! Time to schedule something exciting! ✨"

conn = sqlite3.connect('appointments.db')

conn.row_factory = sqlite3.Row # This enables column access by name

cursor = conn.cursor()

# Check if the appointments table exists

try:

cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='appointments'")

if not cursor.fetchone():

conn.close()

return "📅 No appointments found\n\nYour calendar is empty! Time to schedule something exciting! ✨"

cursor.execute("SELECT * FROM appointments ORDER BY date")

rows = cursor.fetchall()

if not rows:

conn.close()

return "📅 No appointments found\n\nYour calendar is empty! Time to schedule something exciting! ✨"

# Format the appointments list

appointment_lines = [

"📋 All Your Appointments:",

"=======================================",

""

]

for i, row in enumerate(rows, 1):

# Extract date and time parts

date_part = row['date'].split(" ")[0] if " " in row['date'] else row['date']

time_part = row['date'].split(" ")[1] if " " in row['date'] else "No time specified"

appointment_lines.extend([

f"{i}. 📝 {row['title']}",

f" 📅 Date: {date_part}",

f" 🕐 Time: {time_part}",

f" 📍 Location: {row['location']}",

f" 📄 Description: {row['description']}",

f" 🆔 ID: {row['id']}",

"" # Empty line for spacing

])

conn.close()

return "\n".join(appointment_lines)

except sqlite3.Error as e:

conn.close()

return f"❌ Error retrieving appointments: {str(e)}"

import sqlite3

from datetime import datetime

import os

from strands.types.tools import ToolResult, ToolUse

from typing import Any

TOOL_SPEC = {

"name": "update_appointment",

"description": "Update an appointment based on the appointment ID.",

"inputSchema": {

"json": {

"type": "object",

"properties": {

"appointment_id": {

"type": "string",

"description": "The appointment id."

},

"date": {

"type": "string",

"description": "Date and time of the appointment (format: YYYY-MM-DD HH:MM)."

},

"location": {

"type": "string",

"description": "Location of the appointment."

},

"title": {

"type": "string",

"description": "Title of the appointment."

},

"description": {

"type": "string",

"description": "Description of the appointment."

}

},

"required": ["appointment_id"]

}

# Function name must match tool name

def update_appointment(tool: ToolUse, **kwargs: Any) -> ToolResult:

tool_use_id = tool["toolUseId"]

appointment_id = tool["input"]["appointment_id"]

date = tool["input"].get("date")

location = tool["input"].get("location")

title = tool["input"].get("title")

description = tool["input"].get("description")

# Check if database exists

if not os.path.exists('appointments.db'):

return {

"toolUseId": tool_use_id,

"status": "error",

"content": [{"text": "❌ Error: No appointments database found."}]

}

# Check if appointment exists

conn = sqlite3.connect('appointments.db')

cursor = conn.cursor()

# Check if the appointments table exists

try:

cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='appointments'")

if not cursor.fetchone():

conn.close()

return {

"toolUseId": tool_use_id,

"status": "error",

"content": [{"text": "❌ Error: Appointments table does not exist."}]

}

cursor.execute("SELECT * FROM appointments WHERE id = ?", (appointment_id,))

appointment = cursor.fetchone()

if not appointment:

conn.close()

return {

"toolUseId": tool_use_id,

"status": "error",

"content": [{"text": f"❌ Error: Appointment with ID {appointment_id} does not exist."}]

}

# Store original values for comparison

original_date, original_location, original_title, original_description = appointment[1:5]

# Validate date format if provided

if date:

try:

datetime.strptime(date, '%Y-%m-%d %H:%M')

except ValueError:

conn.close()

return {

"toolUseId": tool_use_id,

"status": "error",

"content": [{"text": "❌ Error: Date must be in format 'YYYY-MM-DD HH:MM'"}]

}

# Build update query

update_fields = []

params = []

changes = []

if date and date != original_date:

update_fields.append("date = ?")

params.append(date)

old_date_part = original_date.split(" ")[0] if " " in original_date else original_date

old_time_part = original_date.split(" ")[1] if " " in original_date else "No time"

new_date_part = date.split(" ")[0] if " " in date else date

new_time_part = date.split(" ")[1] if " " in date else "No time"

changes.append(f"📅 Date: {old_date_part} {old_time_part} → {new_date_part} {new_time_part}")

if location and location != original_location:

update_fields.append("location = ?")

params.append(location)

changes.append(f"📍 Location: {original_location} → {location}")

if title and title != original_title:

update_fields.append("title = ?")

params.append(title)

changes.append(f"📝 Title: {original_title} → {title}")

if description and description != original_description:

update_fields.append("description = ?")

params.append(description)

changes.append(f"📄 Description: {original_description} → {description}")

# If no fields to update

if not update_fields:

conn.close()

return {

"toolUseId": tool_use_id,

"status": "success",

"content": [{"text": "ℹ️ No changes needed - your appointment is already up to date! ✨"}]

}

# Complete the query

query = f"UPDATE appointments SET {', '.join(update_fields)} WHERE id = ?"

params.append(appointment_id)

cursor.execute(query, params)

conn.commit()

conn.close()

# Format the success message

update_confirmation = [

"✅ Appointment Updated Successfully!",

"=======================================",

f"🆔 Appointment ID: {appointment_id}",

"",

"📝 Changes Made:"

]

for change in changes:

update_confirmation.append(f" {change}")

update_confirmation.extend([

"",

"Your appointment has been updated! 🎉"

])

return {

"toolUseId": tool_use_id,

"status": "success",

"content": [{"text": "\n".join(update_confirmation)}]

}

except sqlite3.Error as e:

conn.close()

return {

"toolUseId": tool_use_id,

"status": "error",

"content": [{"text": f"❌ Database error: {str(e)}"}]

}

personal_assistant.py

import os

from strands import Agent

from strands.models import BedrockModel

from code_assistant import code_assistant

from calendar_assistant import calendar_assistant

from search_assistant import search_assistant

from constants import SESSION_ID

from strands.models.litellm import LiteLLMModel

# Show rich UI for tools in CLI

os.environ["STRANDS_TOOL_CONSOLE_MODE"] = "enabled"

# Get API key from environment variable

GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")

print(f"API Key: {GOOGLE_API_KEY}")

#model = BedrockModel(

# model_id="us.anthropic.claude-sonnet-4-20250514-v1:0",

#)

model = LiteLLMModel(

model_id="gemini/gemini-2.5-flash",

params={

"max_token": 5000,

"temperature": 0.0,

}

)

personal_assistant_agent = Agent(

model=model,

system_prompt="You are a personal assistant. Use the agents and tools at your disposal to assist the user.",

tools=[code_assistant, calendar_assistant, search_assistant,],

trace_attributes={"session.id": SESSION_ID},

)

if __name__ == "__main__":

print("=============================================================================")

print("🤖 WELCOME TO YOUR PERSONAL ASSISTANT 🤖")

print("=============================================================================")

print("✨ I'm your intelligent coordinator with access to:")

print(" 📅 Calendar Assistant - Schedule and manage appointments")

print(" 💻 Coding Assistant - Write, debug, and manage code")

print(" 🔍 Search Assistant - Research and find information")

print()

print("🎯 I can handle complex requests across multiple domains:")

print(" • 'Schedule a meeting and research the attendees'")

print(" • 'Code a script and schedule time to work on it'")

print(" • 'What's my agenda and help me prepare presentations'")

print()

print("💡 Just tell me what you need - I'll coordinate everything!")

print("🚪 Type 'exit' to quit anytime")

print("=============================================================================")

print()

# Initialize the personal assistant

try:

print("🔄 Initializing Personal Assistant...")

print("✅ Personal Assistant ready!")

print("🤖 All specialized agents are available!")

print()

except Exception as e:

print(f"❌ Error initializing Personal Assistant: {str(e)}")

# Run the agent in a loop for interactive conversation

while True:

try:

user_input = input("👤 You: ").strip()

if not user_input:

print("💭 Please tell me how I can help you, or type 'exit' to quit")

continue

if user_input.lower() in ["exit", "quit", "bye", "goodbye"]:

print()

print("=========================================================")

print("👋 Thank you for using Personal Assistant!")

print("🎉 Have a productive day ahead!")

print("🤖 Come back anytime you need help!")

print("=========================================================")

break

print("🤖 PersonalBot: ", end="")

response = personal_assistant_agent(user_input)

print("\n")

except KeyboardInterrupt:

print("\n")

print("=========================================================")

print("👋 Personal Assistant interrupted!")

print("🤖 See you next time!")

print("=========================================================")

break

except Exception as e:

print(f"❌ An error occurred: {str(e)}")

print("🔧 Please try again or type 'exit' to quit")

print()

----------

Calendar_assistance

import os

from strands import Agent, tool

from strands.models import BedrockModel

from strands_tools import current_time

from calendar_tools import create_appointment, get_agenda, list_appointments, update_appointment

from dotenv import load_dotenv

from strands.models.litellm import LiteLLMModel

# Load environment variables from .env file

load_dotenv()

# Get API key from environment variable

GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")

print(f"API Key: {GOOGLE_API_KEY}")

SESSION_ID = "Rajnish-show-tell-test"

# Show rich UI for tools in CLI

os.environ["STRANDS_TOOL_CONSOLE_MODE"] = "enabled"

@tool

def calendar_assistant(query: str) -> str:

"""

Calendar assistant agent to manage appointments.

Args:

query: A request to the calendar assistant

Returns:

Output from interaction

"""

# Call the agent and return its response

response = agent(query)

print("\n\n")

return str(response)

system_prompt = """You are a helpful calendar assistant that specializes in managing my appointments.

You have access to appointment management tools, and can check the current time to help me organize my schedule effectively.

Always provide the appointment id so that I can update it if required"""

#model = BedrockModel(

# model_id="us.anthropic.claude-sonnet-4-20250514-v1:0",

#)

model = LiteLLMModel(

model_id="gemini/gemini-2.5-flash",

params={

"max_token": 5000,

"temperature": 0.0,

}

)

agent = Agent(

model=model,

system_prompt=system_prompt,

tools=[

current_time,

create_appointment,

list_appointments,

update_appointment,

get_agenda

],

trace_attributes={"session.id": SESSION_ID},

)

if __name__ == "__main__":

print("=" * 60)

print("🗓️ WELCOME TO YOUR PERSONAL CALENDAR ASSISTANT 🗓️")

print("=" * 60)

print("✨ I can help you with:")

print(" 📅 Create new appointments")

print(" 📋 List all your appointments")

print(" 🔄 Update existing appointments")

print(" 📆 Get your daily agenda")

print(" 🕐 Check current time")

print()

print("💡 Tips:")

print(" • Use dates in format: YYYY-MM-DD HH:MM")

print(" • I'll always provide appointment IDs for updates")

print(" • Try: 'What's my agenda for today?' or 'Book a meeting'")

print()

print("🚪 Type 'exit' to quit anytime")

print("=" * 60)

print()

# Run the agent in a loop for interactive conversation

while True:

try:

user_input = input("👤 You: ").strip()

if not user_input:

print("💭 Please enter a message or type 'exit' to quit")

continue

if user_input.lower() in ["exit", "quit", "bye", "goodbye"]:

print()

print("=======================================")

print("👋 Thanks for using Calendar Assistant!")

print("🎉 Have a great day ahead!")

print("=======================================")

break

print("🤖 CalendarBot: ", end="")

response = calendar_assistant(user_input)

except KeyboardInterrupt:

print()

print("=======================================")

print("👋 Calendar Assistant interrupted!")

print("🎉 See you next time!")

print("=======================================")

break

except Exception as e:

print(f"❌ An error occurred: {str(e)}")

print("💡 Please try again or type 'exit' to quit")

print()

-------

code_assistant.py

import os

from strands import Agent, tool

from strands.models import BedrockModel

from strands_tools import python_repl, editor, shell, journal

from constants import SESSION_ID

from dotenv import load_dotenv

from strands.models.litellm import LiteLLMModel

# Load environment variables from .env file

load_dotenv()

# Get API key from environment variable

GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")

print(f"API Key: {GOOGLE_API_KEY}")

# Show rich UI for tools in CLI

os.environ["STRANDS_TOOL_CONSOLE_MODE"] = "enabled"

@tool

def code_assistant(query: str) -> str:

"""

Coding assistant agent

Args:

query: A request to the coding assistant

Returns:

Output from interaction

"""

response = agent(query)

print("\n\n")

return response

system_prompt = """You are a software expert and coder. Write, debug, test, and iterate on software"""

#model = BedrockModel(

# model_id="us.anthropic.claude-sonnet-4-20250514-v1:0",

#)

model = LiteLLMModel(

model_id="gemini/gemini-2.5-flash",

params={

"max_token": 5000,

"temperature": 0.0,

}

)

agent = Agent(

model=model,

system_prompt=system_prompt,

tools=[python_repl, editor, shell, journal],

trace_attributes={"session.id": SESSION_ID},

)

if __name__ == "__main__":

print("=======================================================================")

print("💻 WELCOME TO YOUR PERSONAL CODING ASSISTANT 💻")

print("=======================================================================")

print("🚀 I'm your expert software developer ready to help with:")

print(" 🐍 Python programming and debugging")

print(" 📝 Code writing and optimization")

print(" 🔧 Testing and error fixing")

print(" 📁 File management and editing")

print(" 🖥️ Shell commands and system operations")

print(" 📋 Project documentation and notes")

print()

print("🛠️ Available Tools:")

print(" • Python REPL - Run and test Python code")

print(" • Code Editor - Create and modify files")

print(" • Shell Access - Execute system commands")

print(" • Journal - Document progress and notes")

print()

print("💡 Tips:")

print(" • Be specific about your coding requirements")

print(" • I'll test code before providing solutions")

print(" • Ask for explanations, best practices, or optimizations")

print(" • Try: 'Create a Python script for...' or 'Debug this code...'")

print()

print("🚪 Type 'exit' to quit anytime")

print("=======================================================================")

print()

# Initialize the coding assistant

try:

print("✅ Coding Assistant initialized successfully!")

print()

except Exception as e:

print(f"❌ Error initializing Coding Assistant: {str(e)}")

print("🔧 Please check your configuration and try again.")

# Run the agent in a loop for interactive conversation

while True:

try:

user_input = input("👨‍💻 You: ").strip()

if not user_input:

print("💭 Please describe your coding task or type 'exit' to quit")

continue

if user_input.lower() in ["exit", "quit", "bye", "goodbye"]:

print()

print("=======================================================")

print("👋 Thanks for coding with me!")

print("🎉 Happy coding and debugging!")

print("💻 Keep building amazing things!")

print("=======================================================")

break

print("🤖 CodingBot: ", end="")

response = code_assistant(user_input)

except KeyboardInterrupt:

print("\n")

print("=======================================================")

print("👋 Coding Assistant interrupted!")

print("💾 Don't forget to save your work!")

print("🎉 See you next time!")

print("=======================================================")

break

except Exception as e:

print(f"❌ An error occurred: {str(e)}")

print("🔧 Please try again or type 'exit' to quit")

print()

------

constants.py

SESSION_ID = "Rajnish-show-tell-test"

-----

search_assistance

import os

from dotenv import load_dotenv

from mcp import StdioServerParameters, stdio_client

from strands import Agent, tool

from strands.models import BedrockModel

from strands.tools.mcp import MCPClient

from constants import SESSION_ID

from strands.models.litellm import LiteLLMModel

# Load environment variables

load_dotenv()

# Show rich UI for tools in CLI

os.environ["STRANDS_TOOL_CONSOLE_MODE"] = "enabled"

@tool

def search_assistant(query: str) -> str:

"""

Search assistant agent for handling general queries

Args:

query: A request to the search assistant

Returns:

Output from interaction

"""

# Reuse the already initialized MCP server connection

response = agent(query)

print("\n\n")

return response

PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_API_KEY")

if not PERPLEXITY_API_KEY:

raise ValueError("PERPLEXITY_API_KEY environment variable is required")

print(f"PERPLEXITY_API_KEY API Key: {PERPLEXITY_API_KEY}")

# Get API key from environment variable

GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")

print(f"API Key: {GOOGLE_API_KEY}")

try:

# Initialize Perplexity MCP server

perplexity_mcp_server = MCPClient(

lambda: stdio_client(

StdioServerParameters(

command="docker",

args=[

"run",

"-i",

"--rm",

"-e",

"PERPLEXITY_API_KEY",

"mcp/perplexity-ask",

],

env={"PERPLEXITY_API_KEY": PERPLEXITY_API_KEY},

)

except Exception as e:

raise Exception(f"Failed to initialize MCP Client: {str(e)}")

system_prompt = """You are an intelligent search and research assistant with access to real-time web information.

Your capabilities include:

- Searching the web for current information and news

- Researching topics across various domains

- Providing accurate, up-to-date answers with reliable sources

- Synthesizing information from multiple sources

- Fact-checking and verification

When responding:

- Always cite your sources when possible

- Distinguish between factual information and opinions

- Provide comprehensive yet concise answers

- If information is uncertain or contradictory, mention this

- Suggest follow-up questions when appropriate

- Focus on accuracy and reliability

For research queries:

1. Search for the most current and relevant information

2. Cross-reference multiple sources when possible

3. Provide context and background information

4. Summarize key findings clearly

5. Highlight any limitations or uncertainties in the data"""

# Initialize the MCP server connection once and reuse it

perplexity_mcp_server.__enter__()

try:

#model = BedrockModel(

# model_id="us.anthropic.claude-sonnet-4-20250514-v1:0",

#)

model = LiteLLMModel(

model_id="gemini/gemini-2.5-flash",

params={

"max_token": 5000,

"temperature": 0.0,

}

)

# Get available tools from MCP server

tools = perplexity_mcp_server.list_tools_sync()

agent = Agent(

model=model,

system_prompt=system_prompt,

tools=tools,

trace_attributes={"session.id": SESSION_ID},

)

except Exception as e:

perplexity_mcp_server.__exit__(None, None, None)

raise e

if __name__ == "__main__":

print("====================================================================================")

print("🔍 WELCOME TO YOUR PERSONAL SEARCH ASSISTANT 🔍")

print("====================================================================================")

print("🌐 I'm your intelligent research companion ready to help with:")

print(" 🔎 Real-time web searches and information lookup")

print(" 📰 Current news and trending topics")

print(" 📚 Research across diverse topics and domains")

print(" ✅ Fact-checking and source verification")

print(" 📊 Data analysis and information synthesis")

print(" 🎯 Targeted research with reliable sources")

print()

print("🛠️ Powered by:")

print(" • Perplexity AI - Advanced web search capabilities")

print(" • Real-time information access")

print(" • Multi-source cross-referencing")

print(" • Source citation and verification")

print()

print("💡 Tips:")

print(" • Ask specific questions for better results")

print(" • Request sources when you need citations")

print(" • Try: 'What's the latest news about...' or 'Research...'")

print(" • I can help with current events, facts, and analysis")

print()

print("🚪 Type 'exit' to quit anytime")

print("====================================================================================")

print()

# Run the agent in a loop for interactive conversation

while True:

try:

user_input = input("🔍 You: ").strip()

if not user_input:

print("💭 Please ask me a question or type 'exit' to quit")

continue

if user_input.lower() in ["exit", "quit", "bye", "goodbye"]:

print()

print("========================================================")

print("👋 Thanks for exploring with me!")

print("🌐 Keep discovering and learning!")

print("🔍 See you next time!")

print("========================================================")

break

print("🤖 SearchBot: ", end="")

try:

response = search_assistant(user_input)

except Exception as e:

print(f"❌ Error processing search query: {str(e)}")

print("🔧 Please try rephrasing your question or check your connection")

except KeyboardInterrupt:

print("\n")

print("============================================================")

print("👋 Search Assistant interrupted!")

print("🌐 Thanks for researching with me!")

print("🔍 Happy exploring!")

print("============================================================")

break

except Exception as e:

print(f"❌ An error occurred: {str(e)}")

print("🔧 Please try again or type 'exit' to quit")

print()

javascript:(function(){const selectedText=window.getSelection().toString();const selectedPrompts=['Given the following job description, provide a list of sample resume short bullet points that highlight relevant experience, skills, and achievements. The sample text should be simple yet impactful, helping the candidate stand out among others. Ensure the content is tailored to the key responsibilities and qualifications mentioned in the job description. Sample Resume Bullet Points: -Deep understanding of public cloud technologies: AWS, Rackspace, Oracle Cloud. -Experience of Data Lake, Machine Learning, Generative AI technologies -In-depth knowledge in modernization in cloud platform. Here is the Job Description : ','Read the following job description and summarize the 2-3 key responsibilities that the person will actually perform in this role. Focus on the most important tasks or duties mentioned. Here is the Job Description : ','Summarize the selected text with an emphasis on the most important themes and ideas. Keep the summary concise and clear. Here is the text: ','First list Key topic in document, Then Summarize the main points of the current page in simple language. Highlight (Font bold and red) who, why, when, statistics, and how part of summarization. At the end explain the key concept in simple language. Here is the text: ','Please summarize the following text as a one-line review in simple human language. The summary should capture the main points and overall impression of the article in a clear and concise manner. At the end also identify all unanswered or open-ended questions mentioned in the following text. Use emojis. Here is the text: ','Write a follow-up email based on this job description expressing continued interest and key qualifications. Here is the Job Description : ','Identify key metrics that could be used to measure performance for this role. Here is the Job Description : ','You are an expert coder . Explain this in step by step : ','Summarize this AWS architecture diagram into a 3-paragraph executive summary explaining core services, scalability features, and DR strategy. Here is the context : ','Based on the following architecture image, describe how the data flows '];const fallbackPrompts=['Use the current page as context and write a professional summary for a resume focused on cloud architecture leadership and modernization.','Based on the current page, summarize the 2-3 key responsibilities described for a job or role.','Summarize the current page with an emphasis on the most important themes and ideas. Keep the summary concise and clear.','First list Key topics from the current page, then summarize the main points in simple language. Highlight (Font bold and red) who, why, when, statistics, and how part of summarization. At the end explain the key concept in simple language.','Summarize the current page as a one-line review in simple human language. Also list any unanswered or open-ended questions. Use emojis.','Write a follow-up email showing interest in the topic covered on this page. Highlight relevant strengths or alignment.','Identify key metrics that could be used to measure the performance or outcomes described on this page.','You are an expert coder . Explain the code on this page step by step ','Summarize this architecture diagram into a 3-paragraph executive summary explaining core services, scalability features, and DR strategy.','Based on the following architecture image, describe how the data flows '];const promptLabels=['Resume Writer 📋💬','Key Responsibilities 📰🔥✍%EF%B8%8F','Theme Summary 🌟📚','Highlight Who/Why 🔍','1-line Review 🤔✍%EF%B8%8F','Follow-up Email 📧✅','Performance Metrics 📊📌','Codeing expert 💼📢','Architecture Summarization🎯','Diagram Translation📊📌'];const createButtons=()=>{const container=document.createElement('div');container.style.position='fixed';container.style.top='10px';container.style.right='10px';container.style.zIndex='10000';container.style.backgroundColor='#ffffff';container.style.padding='20px';container.style.border='1px solid #ccc';container.style.borderRadius='12px';container.style.boxShadow='0 4px 12px rgba(0, 0, 0, 0.15)';container.style.fontFamily='Arial, sans-serif';container.style.color='#333';container.style.width='640px';container.style.maxHeight='90vh';container.style.overflowY='auto';const title=document.createElement('h3');title.textContent=selectedText?'Select a Prompt':'No Text Selected – Pick General Prompt';title.style.marginTop='0';title.style.marginBottom='20px';title.style.fontSize='20px';title.style.textAlign='center';container.appendChild(title);const activePrompts=selectedText?selectedPrompts:fallbackPrompts;const grid=document.createElement('div');grid.style.display='grid';grid.style.gridTemplateColumns='1fr 1fr';grid.style.gap='10px';activePrompts.forEach((prompt,index)=>{const button=document.createElement('button');button.textContent=promptLabels[index]||%60Prompt ${index+1}%60;button.style.width='100%';button.style.padding='12px';button.style.backgroundColor='#008cba';button.style.color='#fff';button.style.border='none';button.style.borderRadius='6px';button.style.cursor='pointer';button.style.textAlign='left';button.onclick=()=>{const selectedPrompt=prompt+(selectedText||'');copyToClipboard(selectedPrompt);document.body.removeChild(container);};grid.appendChild(button);});container.appendChild(grid);const closeButton=document.createElement('button');closeButton.textContent='Close';closeButton.style.width='100%';closeButton.style.marginTop='20px';closeButton.style.padding='12px';closeButton.style.backgroundColor='#f44336';closeButton.style.color='#fff';closeButton.style.border='none';closeButton.style.borderRadius='6px';closeButton.style.cursor='pointer';closeButton.onclick=()=>{document.body.removeChild(container);};container.appendChild(closeButton);document.body.appendChild(container);};const copyToClipboard=async(text)=>{try{await navigator.clipboard.writeText(text);alert('Prompt copied to clipboard:\n\n'+text);}catch(error){console.error('Error copying text: ',error);alert('Failed to copy prompt to clipboard. Please try again.');}};createButtons();})();

Lambda Function to Extract All Metadata from Uploaded S3 Images

This document outlines the complete process to deploy an AWS Lambda function that triggers on image uploads to an S3 bucket, extracts all possible metadata (EXIF, TIFF, XMP, IPTC), and stores the results as a JSON file in the same bucket.

📆 Overview

Trigger: S3 (Object Created: image uploaded)
Lambda Runtime: Python 3.9+
Metadata Extracted:
- EXIF (camera data, GPS, etc.)
- TIFF (software, creator info)
- XMP/IPTC (Adobe, AI tool hints)
Tools: Pillow, ExifTool
Output: JSON file in metadata/ folder in S3

🔧 Step-by-Step Deployment

Step 1: Prepare Files Locally

lambda_function.py

import json

import boto3

import os

import tempfile

from PIL import Image, ExifTags

import subprocess

s3 = boto3.client('s3')

def lambda_handler(event, context):

bucket = event['Records'][0]['s3']['bucket']['name']

key = event['Records'][0]['s3']['object']['key']

if key.startswith('metadata/'):

return

with tempfile.NamedTemporaryFile(delete=False) as tmp_file:

s3.download_fileobj(bucket, key, tmp_file)

tmp_file_path = tmp_file.name

metadata = {}

try:

image = Image.open(tmp_file_path)

exif_data = image._getexif()

if exif_data:

metadata['EXIF'] = {

ExifTags.TAGS.get(tag, tag): str(value)

for tag, value in exif_data.items()

}

metadata['TIFF'] = image.info

except Exception as e:

metadata['EXIF_Error'] = str(e)

try:

result = subprocess.run(

['./exiftool/exiftool', tmp_file_path],

stdout=subprocess.PIPE,

stderr=subprocess.PIPE,

text=True

)

metadata['EXIFTOOL'] = {}

for line in result.stdout.strip().split('\n'):

if ':' in line:

key, value = line.split(':', 1)

metadata['EXIFTOOL'][key.strip()] = value.strip()

except Exception as e:

metadata['EXIFTOOL_Error'] = str(e)

metadata_key = f"metadata/{os.path.splitext(os.path.basename(key))[0]}.json"

s3.put_object(

Bucket=bucket,

Key=metadata_key,

Body=json.dumps(metadata, indent=2),

ContentType='application/json'

)

return {

'statusCode': 200,

'body': f"Metadata stored at {metadata_key}"

}

requirements.txt

Pillow

Download ExifTool (Linux x64)

mkdir exiftool

curl -LO https://exiftool.org/Image-ExifTool-12.73.tar.gz

tar -xzf Image-ExifTool-12.73.tar.gz

cp Image-ExifTool-12.73/exiftool exiftool/

chmod +x exiftool/exiftool

Step 2: Build Deployment ZIP

mkdir -p package/python

pip install -r requirements.txt -t package/python

cp lambda_function.py package/

cp -r exiftool package/

cd package

zip -r ../lambda-deploy.zip .

cd ..

Step 3: Create Lambda in AWS Console

Go to AWS Lambda > Create Function
Name: ImageMetadataExtractor
Runtime: Python 3.9
Upload lambda-deploy.zip
Set handler: lambda_function.lambda_handler
Add environment variable (if needed):
PATH=/var/task/exiftool:$PATH
Set timeout to 1 minute

Step 4: Add S3 Trigger

Go to Lambda > Triggers
Add S3 bucket trigger
Event type: PUT
Suffix: .jpg, .jpeg, .png
Optional prefix: images/

Step 5: Attach IAM Role Policy

{

"Effect": "Allow",

"Action": [

"s3:GetObject",

"s3:PutObject"

],

"Resource": "arn:aws:s3:::YOUR_BUCKET_NAME/*"

}

🌍 Sample Metadata Output (JSON)

{

"EXIF": {

"Make": "Apple",

"Model": "iPhone 13"

},

"TIFF": {

"Software": "DALL-E 3"

},

"EXIFTOOL": {

"XMP Toolkit": "Adobe XMP Core",

"AI Generated": "Yes"

}

🎉 You're Done!

Upload an image to S3 and watch a full metadata .json file appear in the metadata/ folder.

Need a ZIP download or CloudFormation version next?

🛠️ Goal

When a new image (.jpg, .jpeg, .png) is uploaded to your S3 bucket, your Lambda function will automatically run to extract metadata.

✅ Step-by-Step Instructions

🪣 STEP 1: Create the S3 Bucket

Go to AWS Console > S3
Click “Create bucket”
Fill in:
- Bucket name: image-upload-metadata-bucket (use a unique name)
- Region: Choose your preferred AWS Region
Leave all other settings as default or adjust:
- Block Public Access: Keep ON
- Versioning: Optional
- Default encryption: Optional
Click “Create bucket”

🧠 STEP 2: Prepare Your Lambda Function

If you haven’t already, deploy the Lambda function using the guide you created.

Make sure:

The handler is lambda_function.lambda_handler
The IAM role has permission to read from and write to this S3 bucket

json

CopyEdit

{

"Effect": "Allow",

"Action": [

"s3:GetObject",

"s3:PutObject"

],

"Resource": "arn:aws:s3:::image-upload-metadata-bucket/*"

}

⚡ STEP 3: Add S3 Trigger to Lambda

Go to Lambda Console
Open your function: ImageMetadataExtractor
Click Configuration > Triggers
Click “Add Trigger”
Choose:
- Trigger type: S3
- Bucket: Select image-upload-metadata-bucket
- Event type: PUT
- Suffix: .jpg (optional: add .jpeg, .png in a separate step or use blank for all)
- Prefix: images/ (optional)
Check the box: “Acknowledge” permission update
Click “Add”

✅ Now your Lambda will trigger whenever someone uploads an image to this bucket.

📂 (Optional) STEP 4: Organize Folder Structure

You can ask users/applications to:

Upload images to images/
Metadata will be stored in metadata/ folder by Lambda

🧪 STEP 5: Test It

Upload an image (.jpg, .png) to image-upload-metadata-bucket/images/
Wait 5–10 seconds
Check the bucket — you should see a new file:
- metadata/your-image.json

Tuesday, December 3, 2024

6:45 AM

Key Points from the AWS re:Invent 2024 Monday Night Live Presentation:

Introduction by Peter DeSantis

• Welcomed attendees to re:Invent 2024 and outlined the evening’s focus on deep technical innovations.

• Discussed AWS’s unique approach to designing services, emphasizing foundational mechanisms for security, performance, and reliability.

• Shared an analogy using trees to describe AWS’s technical investments:

• Roots: Foundational mechanisms (e.g., leadership in details and weekly AWS-wide operations meetings).

• Trunk: Core innovations like Nitro, custom silicon, and databases.

• Canopy: The results visible to customers (e.g., high performance and security).

Innovations in Compute and Networking (Dave Brown)

Graviton Processors:

• Evolution from Graviton1 to Graviton4, focusing on real-world workload performance.

• Graviton4 boasts multi-socket support, increased vCPU count, and better performance for intensive workloads like machine learning and analytics.

AWS Nitro System:

• Revolutionized server architecture for enhanced security and scalability.

• Enabled cryptographic attestation for server integrity.

Storage Innovations:

• Transitioned from monolithic architectures to disaggregated storage for flexibility and resilience.

• Introduced Nitro cards for storage systems, reducing operational complexity and improving recovery speeds.

AI Infrastructure Innovations (Peter DeSantis)

AI Training Challenges:

• Discussed the “scale-up” need for more powerful servers and the “scale-out” requirement for vast interconnected clusters.

• Trained AI models are becoming exponentially larger due to scaling laws.

Trainium2:

• AWS’s next-gen AI chip designed for both training and inference workloads.

• Features include high-performance systolic array architecture and interconnect technology (Neuron Link) for building “Ultra Servers.”

• Trainium2-powered infrastructure allows faster, cost-effective training of trillion-parameter models.

Collaboration with Anthropic

• Announced “Project Rainier,” a new cluster with hundreds of thousands of Trainium2 chips.

• Partnered with Anthropic to optimize AI model training and inference, including a latency-optimized version of their Claude 3.5 model.

Networking and Scale-Out Innovations

TNP10 Network:

• Custom-built AI-optimized network fabric with massive capacity and low latency.

• Features innovations like trunk connectors and Firefly optic plugs for efficient, reliable deployment.

Scalable Intent-Driven Routing (CIDR):

• AWS’s proprietary protocol combines centralized optimization with decentralized speed, responding to failures within a second.

Conclusion

• AWS is innovating across the stack—from silicon to networking—to deliver unparalleled performance, security, and cost efficiency.

• Customers benefit from these advancements in their workloads, whether AI, storage, or compute.

Closing

• The event ended with an invitation to explore re:Invent 2024, celebrating the depth and breadth of AWS innovations.

-----------

1. Graviton

• What is it? A general-purpose processor.

• Purpose: Designed for running everyday cloud workloads like web servers, databases, and containers efficiently.

• Key Feature: Built to provide high performance and cost savings for common cloud applications.

• Unique Focus: Optimized for scale-out workloads like handling lots of simultaneous smaller tasks.

2. Trainium

• What is it? A specialized AI processor for training machine learning models.

• Purpose: Designed to handle the heavy lifting when teaching AI systems how to think and make predictions.

• Key Feature: Optimized for training large AI models quickly and cost-effectively.

• Unique Focus: Scale-up workloads for building cutting-edge AI models with trillions of parameters.

3. Inferentia

• What is it? A specialized AI processor for inference tasks.

• Purpose: Focuses on running already-trained AI models to make predictions or generate responses (e.g., chatbot answers or image recognition).

• Key Feature: Optimized for delivering low-cost, high-speed results when AI models are in use.

• Unique Focus: AI inference workloads, such as serving predictions or generating outputs in real-time

4. Nitro

• What is it? A hardware and software system that powers the underlying infrastructure of AWS servers.

• Purpose: Provides the foundation for security, performance, and scalability in AWS instances (e.g., EC2).

• Key Feature: Offloads tasks like virtualization and security from the main processor, enabling better performance.

• Unique Focus: Infrastructure optimization and enhanced server performance for all AWS workloads.

----------

When choosing between AI training, AI inference, and general-purpose computing, the decision depends on the unique requirements of each workload. Here’s a comparison of their unique requirements and why you’d choose one over the other:

1. AI Training

• Unique Requirement:

• Massive computational power to process large datasets and optimize models with trillions of parameters.

• High-speed memory and parallelism for handling complex matrix calculations efficiently.

• Scalability to train models across thousands of GPUs or specialized processors.

• Why Choose This?

• You’re building or refining AI models, such as training a chatbot, image recognition system, or language model.

• Needs specialized processors like Trainium to handle the intense and repetitive math calculations in deep learning.

2. AI Inference

• Unique Requirement:

• Low-latency performance to provide quick predictions or outputs.

• Efficient use of compute and memory to handle model predictions without needing as much parallelism as training.

• Ability to scale for real-time usage (e.g., serving millions of API requests per second).

• Why Choose This?

• You’re running trained AI models to serve predictions, such as generating responses in a chatbot, identifying objects in images, or detecting fraud in transactions.

• Needs specialized processors like Inferentia to optimize for speed and cost when running AI models in production.

3. General-Purpose Computing

• Unique Requirement:

• Flexibility to handle a wide range of workloads (e.g., databases, web servers, analytics).

• Balanced performance and cost without needing the extreme specialization of AI-focused tasks.

• Compatibility with diverse software stacks and programming environments.

• Why Choose This?

• You’re managing common IT workloads like hosting websites, running containerized applications, or processing standard business applications.

• Needs general-purpose processors like Graviton, which are cost-effective and energy-efficient for non-AI workloads.

Key Differences in Selection

Aspect

AI Training

AI Inference

General-Purpose Computing

Compute Power

Extremely high for parallel processing

Moderate for real-time processing

Balanced for diverse workloads

Memory Requirements

High-speed memory for large datasets

Efficient memory for model predictions

Standard memory for broad use cases

Latency Sensitivity

Low sensitivity; tolerates longer runtimes

Very sensitive; real-time responses needed

Moderate sensitivity

Scale

Scale-out for massive parallel clusters

Scales horizontally for serving predictions

Scales moderately with standard workloads

Cost Sensitivity

Secondary to performance

High priority for cost-efficiency

Balanced between cost and performance

Yes, AWS offers several other processors besides Graviton, Trainium, and Inferentia. Each is optimized for specific use cases and workloads. Here's an overview of these additional processors and what they are good for:

1. Intel Xeon Processors (x86-based)

What are they good for?
- Enterprise applications: Widely used for legacy workloads, traditional enterprise software, and databases.
- Compatibility: Ideal for applications that require x86 architecture and cannot yet be migrated to Arm-based architectures like Graviton.
- Performance needs: Suitable for workloads requiring high single-thread performance, such as financial modeling or simulations.

2. AMD EPYC Processors (x86-based)

What are they good for?
- Cost-efficient workloads: Offers a balance of performance and lower cost compared to Intel processors.
- High-core workloads: Suitable for applications like big data analytics, memory-intensive workloads, and distributed applications.
- Alternatives to Intel: For customers who want x86 compatibility but at a lower price point.

3. NVIDIA GPUs

What are they good for?
- AI and ML workloads: Particularly useful for AI training and inference tasks, especially with deep learning frameworks like TensorFlow or PyTorch.
- Graphics rendering: Ideal for video rendering, game development, and other GPU-intensive graphics workloads.
- High-performance computing (HPC): Accelerates workloads like scientific simulations, genomics, or financial risk modeling.
- Examples: AWS offers GPU-powered instances with NVIDIA A100, V100, and T4 GPUs.

4. Habana Gaudi Processors

What are they good for?
- AI training: Optimized for deep learning training tasks, providing an alternative to NVIDIA GPUs and Trainium.
- Cost-performance balance: Designed for customers seeking cost-efficient AI training without compromising on scalability.
- Specialty AI workloads: Works well with large-scale neural networks and distributed training scenarios.

5. AWS Nitro System (Security Co-Processor)

What is it?
- Not a traditional CPU but a specialized system managing virtualization, security, and storage offloading.
What is it good for?
- Improved performance: By offloading system management tasks from the main processors, it ensures optimized performance for compute instances.
- Enhanced security: Provides hardware-level security for EC2 instances, including encryption and secure boot processes.

6. FPGAs (Field-Programmable Gate Arrays)

What are they good for?
- Custom workloads: Useful for applications requiring custom hardware acceleration, such as genomics, video processing, or high-frequency trading.
- Flexibility: Allows customers to design and deploy their own custom hardware logic.
- Examples: Available in AWS EC2 F1 instances.

7. AWS Inferentia 2 (Upcoming or Enhanced Inferentia)

What is it?
- The next generation of Inferentia, optimized further for AI inference workloads.
What is it good for?
- High-throughput inference: Designed to handle the largest AI models with reduced latency and cost.
- Enhanced efficiency: Improvements over the original Inferentia for handling diverse inference workloads.

Comparison of Additional Processors

Processor

Architecture

Best For

Examples of Use Cases

Intel Xeon

x86

Legacy and enterprise apps

Databases, ERP systems

AMD EPYC

x86

Cost-effective general-purpose workloads

Big data, web hosting

NVIDIA GPUs

GPU

AI/ML training, rendering, HPC

Deep learning, video rendering, scientific HPC

Habana Gaudi

Custom AI

Cost-effective AI training

Deep learning frameworks

Nitro System

Custom ASIC

Security and virtualization

EC2 infrastructure, data encryption

FPGAs

Customizable HW

Specialized hardware acceleration

Genomics, video processing, trading

How to Choose?

Legacy compatibility: Use Intel or AMD processors.
General-purpose workloads: Prefer Graviton for cost-efficiency.
AI Training: Trainium or Habana Gaudi.
AI Inference: Inferentia or NVIDIA GPUs.
Graphics or HPC: NVIDIA GPUs.
Custom acceleration: FPGAs for custom hardware needs.

Processor-Specific Programming Language/Framework Support

Processor

Supported Languages/Frameworks

Use Cases

Incompatibilities

Graviton

Java, Python, Go, Node.js, Rust, C, C++ (with recompilation), PHP, Ruby

General-purpose workloads

Legacy applications tied to x86; some older binary-only software.

Trainium

Python (TensorFlow, PyTorch, MXNet)

AI/ML training

Non-ML applications; requires frameworks optimized for Trainium.

Inferentia

Python (TensorFlow, PyTorch, ONNX Runtime)

AI inference

Training or general-purpose workloads.

Intel/AMD

All major languages, including C, C++, Java, Python, .NET, PHP

Enterprise applications, HPC

No specific limitations but higher cost for workloads better suited to Graviton or AI-specific processors.

NVIDIA GPUs

Python (TensorFlow, PyTorch, CUDA), C++ (CUDA, OpenCL)

AI, graphics, HPC

Workloads not benefiting from parallel processing (e.g., small-scale databases).

Habana Gaudi

Python (TensorFlow, PyTorch)

Cost-effective AI training

Inference or general-purpose applications.

FPGAs

VHDL, Verilog (for hardware design); APIs in Python or C

Custom hardware acceleration

Requires hardware programming expertise; unsuitable for general-purpose or high-level AI tasks.

Quick Decision Guide

General-purpose apps: Use Graviton unless legacy software demands x86 (Intel/AMD).
AI Training: Choose Trainium (or Habana Gaudi for cost-sensitive training).
AI Inference: Use Inferentia or NVIDIA GPUs for low latency and high throughput.
Graphics/HPC: NVIDIA GPUs for rendering or complex parallel tasks.
Custom Acceleration: Use FPGAs for highly specialized workloads.

Act like document reviewer. Summarize the main points of the current page in simple language . Highlight ( Font bold and red) who , why, when , statistics and how part of summarization.At the end explain the key concept in simple language

https://arxiv.org/abs/2107.04767

https://ieeexplore.ieee.org/document/10612615

https://community.aws/content/2Z4yo9liXdKl2eoUYexiLvzFCZY

https://aws.amazon.com/blogs/architecture/choosing-your-vpc-endpoint-strategy-for-amazon-s3/

https://docs.aws.amazon.com/en_cn/vpc/latest/userguide/endpoint-services-overview.html

--------

Here are the key soft and hard service quota limits for Amazon SNS and SQS:

Amazon SNS

Hard Limits

Maximum message size: 256 KB
Maximum header size: 16 KB
Maximum number of messages in PublishBatchRequest: 10
Email delivery rate: 10 messages per second (cannot be increased)

Soft Limits (can be increased)

Topics per account: Varies by region
Subscriptions per account: Varies by region
Pending subscriptions: 5,000 per account
SMS delivery rate:
- Promotional: 20 messages per second
- Transactional: 20 messages per second
Messages published per second: Varies by region

Amazon SQS

Hard Limits

Queue name length: 80 characters
Queue policy size: 8,192 bytes
Delay queue: Maximum 15 minutes
Long polling wait time: Maximum 20 seconds
Number of queues listed per ListQueues request: 1,000

Soft Limits (can be increased)

In-flight messages:
- Standard queues: Approximately 120,000
- FIFO queues: 20,000
Message throughput for FIFO queues:
- Unbatched: 300 transactions per second
- High throughput: Varies by region, up to 70,000 transactions per second in some regions

It's important to note that some of these limits, especially the soft limits, can vary by AWS region and account status. AWS recommends using Service Quotas to view your account-specific quotas and request increases when needed. You can also set up CloudWatch alarms to notify you when you're approaching quota limits.

Citations:

[1] https://docs.aws.amazon.com/general/latest/gr/sns.html

[2] https://docs.aws.amazon.com/general/latest/gr/sqs-service.html

[3] https://aws.amazon.com/about-aws/whats-new/2022/11/amazon-sns-enhanced-integration-aws-service-quotas/

[4] https://docs.aws.amazon.com/servicequotas/latest/userguide/configure-cloudwatch.html

[5] https://docs.aws.amazon.com/AWSSimpleQueueService/latest/SQSDeveloperGuide/quotas-fifo.html

[6] https://www.bluematador.com/docs/troubleshooting/aws-sqs-limits

[7] https://docs.aws.amazon.com/AWSSimpleQueueService/latest/SQSDeveloperGuide/quotas-queues.html

[8] https://docs.aws.amazon.com/AWSSimpleQueueService/latest/SQSDeveloperGuide/sqs-quotas.html

---------

A proof of delivery (POD) is a document that verifies that a carrier has fulfilled their contractual obligations for a shipment by confirming with the recipient. It's a receipt that shows that the goods were delivered to the correct person at the right location

https://www.stampli.com/blog/invoice-management/vendor-invoice-management-workflow/

Proof of Delivery (POD) documents are essential in verifying that goods or services have been delivered as agreed upon. However, several potential issues can arise with PODs that may complicate or delay processing, especially when manual adjustments or errors occur. Here are some common challenges:

1. Manual Adjustments and Handwritten Notes:

• Illegibility: Handwritten changes, such as quantity adjustments, customer notes, or signatures, may be difficult to read, leading to misinterpretation.

• Ambiguity: Manual notes often lack clarity or standardized formatting, which makes automated processing difficult and introduces the risk of errors.

• Unstructured Data: Manual adjustments can include corrections to delivery dates, conditions, or other variables that aren’t easily captured by standard OCR technology.

2. Missing or Incomplete Information:

• Signatures: Lack of a signature or incomplete signature details may cause delays in confirming the delivery.

• Unrecorded Delivery Time: Missing timestamps or incomplete date/time data can lead to disputes over when the goods were delivered.

• Item Discrepancies: Sometimes, the delivered items don’t match the original order in terms of quantity, type, or condition. If this is not noted clearly on the POD, it can result in disputes during settlement.

3. Damaged or Poor-Quality Documents:

• Physical Damage: PODs that are torn, smudged, or have faded ink are hard to process, especially when digitizing them.

• Blurry Scans or Photos: Poor quality scans or photographs can lead to incomplete or inaccurate data capture during the digital processing phase.

4. Delayed or Missing POD Submission:

• Lost PODs: Physical PODs can easily be lost or misplaced, delaying settlements and potentially leading to disputes.

• Delayed Submission: If drivers or delivery personnel don’t submit PODs promptly, it delays the invoicing and settlement process.

• Inconsistent Submission Formats: Different submission formats (paper, digital, photo, etc.) can create inconsistencies in how PODs are processed.

5. Fraud or Tampering:

• Altered Information: Manual tampering with the POD (e.g., changing delivery dates or quantities) can lead to fraud and incorrect settlements.

• Forged Signatures: A customer or recipient’s signature might be forged, leading to disputes over whether the delivery was actually accepted.

6. Data Entry Errors:

• Manual Data Entry Mistakes: When POD information is manually inputted into a system, there’s a high risk of human error, especially with complex or poorly structured documents.

• Inconsistent Data Fields: Some PODs may lack standardization, making it difficult to consistently extract key information such as item codes, delivery addresses, or special instructions.

7. Non-Compliance with Contractual Terms:

• Delivery Conditions Not Met: PODs might not capture deviations from agreed terms, such as delivery outside the agreed time window or goods being delivered in unacceptable condition, which can lead to delays in settlement or disputes with customers.

• Inaccurate Documentation of Issues: If issues like damages or shortages are not documented clearly on the POD, it could result in a delayed or incorrect claims process.

8. Multiple Stakeholders Involved:

• Conflicting Versions: Multiple parties (e.g., carrier, shipper, and customer) may have conflicting versions of a POD, leading to disputes over the exact details of the delivery.

• Lack of Communication: Inefficient communication between stakeholders may cause delays in resolving discrepancies or filing claims based on POD data.

9. Environmental Factors:

• Harsh Conditions: PODs, especially paper forms, can be damaged by environmental factors (rain, dirt, wear and tear from travel), making them unreadable or illegible.

• Geographical Challenges: In remote locations, connectivity issues may delay the submission of digital PODs or lead to incomplete document uploads.

10. Discrepancies in Expected vs. Actual Delivery:

• Mismatch in Delivery Items: Delivered goods may not match the order in terms of quality or quantity, and if this isn’t captured on the POD, it could lead to disputes during settlement.

• Partial Deliveries: Sometimes, only part of an order is delivered, and if this is not clearly noted on the POD, it could result in billing inaccuracies.

Conclusion:

-------------------------

https://whimsical.com/guidance-for-choosing-aws-services-textract-rekognition-or-bedro-A1w4PYYM9tMRqFWUoXsz4K

https://dashboard.scale.com/nucleus/

https://www.labellerr.com/blog/video-annotation-services/

To create an AWS Lambda function that accepts JSON data, validates required fields, stores the data in an Amazon RDS database, and returns a success or failure message, follow the steps below:

### Step 1: Set Up the RDS Database

First, ensure that your RDS database is properly set up and that you have the necessary connection details (endpoint, username, password, database name, etc.).

The database table should be created with the following SQL:

```sql

CREATE TABLE Invoices (

storeId VARCHAR(255) NOT NULL,

invoiceDate DATE NOT NULL,

TotalAmount DECIMAL(10, 2) NOT NULL,

data JSON NOT NULL,

PRIMARY KEY (storeId, invoiceDate)

);

```

### Step 2: Lambda Function Code

Here’s the complete code for the AWS Lambda function:

```python

import json

import pymysql

import os

# Database connection settings

RDS_HOST = os.getenv('RDS_HOST')

RDS_USER = os.getenv('RDS_USER')

RDS_PASSWORD = os.getenv('RDS_PASSWORD')

RDS_DB_NAME = os.getenv('RDS_DB_NAME')

def lambda_handler(event, context):

# Expect the JSON data in the 'body' key from the calling function

if 'body' not in event:

return {

'statusCode': 400,

'body': json.dumps({'message': 'Request body is missing'})

}

# Parse the JSON data from the event body

try:

invoice_data = json.loads(event['body'])

except json.JSONDecodeError:

return {

'statusCode': 400,

'body': json.dumps({'message': 'Invalid JSON format'})

}

# Validate required fields

required_fields = ['storeId', 'date', 'TotalAmount']

for field in required_fields:

if field not in invoice_data:

return {

'statusCode': 400,

'body': json.dumps({'message': f'Missing required field: {field}'})

}

store_id = invoice_data['storeId']

invoice_date = invoice_data['date']

total_amount = invoice_data['TotalAmount']

# Validate the data types (Optional but recommended)

try:

total_amount = float(total_amount)

except ValueError:

return {

'statusCode': 400,

'body': json.dumps({'message': 'TotalAmount should be a number'})

}

# Connect to the RDS database

try:

connection = pymysql.connect(host=RDS_HOST,

user=RDS_USER,

password=RDS_PASSWORD,

database=RDS_DB_NAME,

cursorclass=pymysql.cursors.DictCursor)

except pymysql.MySQLError as e:

return {

'statusCode': 500,

'body': json.dumps({'message': f'Error connecting to RDS: {str(e)}'})

}

# Insert the data into the database

try:

with connection.cursor() as cursor:

sql = """

INSERT INTO Invoices (storeId, invoiceDate, TotalAmount, data)

VALUES (%s, %s, %s, %s)

"""

cursor.execute(sql, (store_id, invoice_date, total_amount, json.dumps(invoice_data)))

connection.commit()

except pymysql.MySQLError as e:

return {

'statusCode': 500,

'body': json.dumps({'message': f'Error inserting data into RDS: {str(e)}'})

}

finally:

connection.close()

# Return a success response

return {

'statusCode': 200,

'body': json.dumps({'message': 'Invoice data successfully stored'})

}

```

Step 3: Environment Variables

Set the following environment variables for the Lambda function:

• RDS_HOST: Your RDS endpoint

• RDS_USER: Your RDS username

• RDS_PASSWORD: Your RDS password

• RDS_DB_NAME: Your RDS database name

Step 4: AWS Lambda Permissions

Ensure that your Lambda function has the necessary permissions to connect to the RDS database. This can be done by assigning the correct IAM role and security group to the Lambda function.

Step 5: Deploy and Test

Deploy the Lambda function and test it with sample JSON data to ensure it inserts the data correctly into the RDS database.

{

"body": "{\"storeId\": \"123\", \"date\": \"2024-08-23\", \"TotalAmount\": 150.75}"

}

{

"body": "{\"storeId\": \"123\", \"date\": \"2024-08-23\", \"TotalAmount\": 150.75, \"customerName\": \"John Doe\", \"items\": [{\"itemId\": \"A1\", \"quantity\": 2, \"price\": 50.00}, {\"itemId\": \"B2\", \"quantity\": 1, \"price\": 50.75}], \"taxAmount\": 10.00, \"notes\": \"Special discount applied\"}"

}

=========

import json

import pymysql

import os

# Database connection settings

RDS_HOST = os.getenv('RDS_HOST')

RDS_USER = os.getenv('RDS_USER')

RDS_PASSWORD = os.getenv('RDS_PASSWORD')

RDS_DB_NAME = os.getenv('RDS_DB_NAME')

def lambda_handler(event, context):

# Connect to the RDS database

try:

connection = pymysql.connect(host=RDS_HOST,

user=RDS_USER,

password=RDS_PASSWORD,

database=RDS_DB_NAME,

cursorclass=pymysql.cursors.DictCursor)

except pymysql.MySQLError as e:

return {

'statusCode': 500,

'body': json.dumps({'message': f'Error connecting to RDS: {str(e)}'})

}

try:

with connection.cursor() as cursor:

# Query all data from the Invoices table

sql = "SELECT * FROM Invoices"

cursor.execute(sql)

result = cursor.fetchall()

# Return the result as JSON

return {

'statusCode': 200,

'body': json.dumps(result)

}

except pymysql.MySQLError as e:

return {

'statusCode': 500,

'body': json.dumps({'message': f'Error querying data from RDS: {str(e)}'})

}

finally:

connection.close()

Saved responses are view only

close_fullscreen

This presentation deck will explore advanced Retrieval Augmented Generation (RAG) techniques.

Slide 1: Title - Advanced RAG Techniques

What is RAG? Retrieval Augmented Generation (RAG) is a powerful technique that enhances the abilities of Large Language Models (LLMs) by integrating external knowledge from various sources. [1]
Why Advanced RAG? While basic RAG is useful, it has limitations. Advanced RAG addresses these by incorporating sophisticated methods to improve the accuracy, relevance, and context-awareness of LLM outputs.

Slide 2: Naive RAG - The Foundation

Basic Components:Document Collection: Start with a set of text documents containing relevant information. [2]
Text Segmentation: Divide the documents into smaller, manageable chunks of text.
Vectorization: Convert the text chunks into numerical representations (vectors) using techniques like Transformer Encoder models.
Index Creation: Store the vectors in an index for efficient retrieval. [2]
Process:A user query is received.
The query is vectorized and compared to the vectors in the index.
The most similar text chunks are retrieved.
An LLM uses these chunks as context to generate a response. [2]

Slide 3: Challenges of Naive RAG

Missing Content: Naive RAG struggles to generate complete responses when crucial information is missing from the retrieved chunks. [1]
Reasoning Mismatch: Even with relevant information, inconsistencies or errors can occur due to difficulties in interpreting and reasoning over the retrieved content. [1]
Multimodal Data Handling: Traditional RAG, being text-centric, faces challenges in effectively understanding and processing multimodal data like images and tables. [1]

Slide 4: Advanced RAG Techniques - Enhancing Performance

Pre-Retrieval:Query Expansion: Generate multiple queries from the user's input to increase the chances of finding all relevant documents. [3]
Step-back Prompting: Use an LLM to generate a broader, more general query from the original one, retrieving higher-level context to enhance the final answer. [4]
Retrieval:Hybrid Search: Combine keyword-based search with vector similarity search to improve the accuracy and relevance of retrieved results. [5]
Semantic Ranking: Utilize techniques like sentence-transformer cross-encoders or external reranking services (e.g., Cohere) to prioritize retrieved documents based on their semantic similarity to the query. [6]
Post-Retrieval:Critique and Refinement: Employ an LLM to critically evaluate retrieved content and potentially trigger additional searches if needed. [3, 7]
Reranking and Filtering: Fine-tune retrieved results using similarity scores, keywords, metadata, or re-ranking models to prioritize the most relevant and timely information. [6]
Contextualization Techniques:Employ methods like "Direct Source Mention" or "Fuzzy Matching" to align the generated response with specific text chunks, ensuring accurate reflection of the source information. [8]
Implement Chat Engines (e.g., ContextChatEngine) to handle follow-up questions and maintain context within a dialogue. [9]

Slide 5: Chunking - A Critical Aspect of RAG

What is Chunking? Chunking involves dividing large texts into smaller, manageable units (chunks), directly impacting the efficiency of information retrieval. [10]
Impact of Chunking:Retrieval Quality: Directly affects the relevance of retrieved chunks to the user's query. [11]
Vector Database Cost: Influences storage costs and query latency based on the number and size of chunks. [11]
LLM Performance: Impacts LLM latency and cost by determining the amount of context provided. [11]
Chunking Strategies:Text Splitter: Dividing text based on a predefined number of characters.
Character Splitter: Splitting text at specific character delimiters (e.g., newline or space). [12]
Recursive Character Splitter: Recursively splitting text using character delimiters until chunks are below a size threshold. [12]
Sentence Splitter: Dividing text at sentence boundaries. [12]
Semantic Splitting: Using techniques like sentence transformers to split text into semantically coherent units. [12]
LLM-Based Chunking: Utilizing LLMs to divide text based on propositions, which are atomic expressions of distinct factoids within the text. [13, 14]

Slide 6: Multimodal RAG - Expanding the Horizons

Beyond Text: Multimodal RAG extends the capabilities of RAG systems to handle and reason over various data modalities, including text, images, tables, and more. [1, 15]
Architecture:Extract different data types separately.
Use a Vision-Language Model (VLM) to generate text summarizations from the data.
Embed both the text summaries and the raw data into a vector database.
Store the raw unstructured data in a document store.
Upon receiving a query, an LLM retrieves relevant vectors from both the vector database and document store to generate a response. [16]
Benefits:Enhanced Content Comprehension: Enables the system to understand and process queries involving different modalities. [17]
Comprehensive Answers: Facilitates the generation of multimodal answers, combining textual, tabular, and visual information in a unified manner. [15]
Improved Accuracy and Insights: Combining insights from different modalities leads to more accurate and insightful responses. [17]

Slide 7: Evaluating RAG Performance

Key Metrics:Relevance of Retrieved Context: Assesses how well the retrieved information aligns with the user's query. [18]
Groundedness of Answers: Measures how well the generated responses are supported by the retrieved context. [18]
Overall Relevance of Answers: Evaluates the overall coherence and relevance of the LLM's output to the user's question. [18]
Advanced Evaluation:Golden Dataset: Use a curated set of questions with approved answers, metadata, and source document references as a benchmark for evaluating system accuracy. [19]
Assessment Pipeline: Develop a system for efficiently analyzing user feedback and the reasoning behind the AI's responses to identify areas for improvement. [20]
Harms Modeling: Implement methodologies for predicting and mitigating potential risks associated with the RAG system, such as privacy breaches or biases in generated content. [21]

Slide 8: Conclusion

The Future of RAG: Advanced RAG techniques represent a significant step forward in enhancing the capabilities of LLMs, pushing the boundaries of their potential applications. [18, 22]
Continued Research and Development: The field of RAG is constantly evolving, with ongoing research and development efforts focused on further improving the efficiency, accuracy, and adaptability of RAG systems. [22]
Unlocking New Possibilities: As RAG technology progresses, it unlocks new opportunities for more sophisticated, interactive, and impactful applications in various domains. [18, 22]

This structure provides a comprehensive overview of advanced RAG techniques, highlighting key concepts and practical implementations.

https://aws.amazon.com/blogs/machine-learning/create-a-multimodal-assistant-with-advanced-rag-and-amazon-bedrock/

https://medium.com/@krtarunsingh/advanced-rag-techniques-unlocking-the-next-level-040c205b95bc

https://www.pinecone.io/learn/advanced-rag-techniques/

https://techcommunity.microsoft.com/t5/ai-azure-ai-services-blog/advanced-rag-with-azure-ai-search-and-llamaindex/ba-p/4115007

https://www.rungalileo.io/blog/mastering-rag-advanced-chunking-techniques-for-llm-applications

https://learn.microsoft.com/en-us/azure/developer/ai/advanced-retrieval-augmented-generation

https://docs.aws.amazon.com/sagemaker/latest/dg/data-wrangler.html

https://docs.aws.amazon.com/sagemaker/latest/dg/studio-updated-migrate.html

https://docs.aws.amazon.com/sagemaker/latest/dg/studio-updated-ui.html

https://docs.aws.amazon.com/sagemaker/latest/dg/studio-ui.html

Who are the people joining (AI ML / Infra ?)

Should I go with new GUI

https://docs.aws.amazon.com/id_id/iot/latest/developerguide/aws-iot-how-it-works.html

https://aws.amazon.com/blogs/architecture/building-event-driven-architectures-with-iot-sensor-data/

https://youtu.be/6O9DqCrInvw

https://docs.aws.amazon.com/bedrock/latest/userguide/agents-how.html

https://youtu.be/JkDzZFTXeSw

Here are step-by-step instructions for installing and running the Bedrock Agents demo from the GitHub repository you provided:

1. Clone the repository:

```

git clone https://github.com/trevorspires/Bedrock-Agents-Demo-Final.git

cd Bedrock-Agents-Demo-Final

```

2. Install the required Python packages:

```

pip install -r requirements.txt

```

3. Set up AWS credentials with permissions to access Bedrock. You can do this by configuring the AWS CLI or setting environment variables.

4. Create an S3 bucket to store the knowledge base documents. Upload the provided PDF files (fomcminutes20230726.pdf, fomcminutes20230920.pdf, fomcminutes20231101.pdf) to this S3 bucket.

5. Create another S3 bucket to store artifacts. Upload the provided ActionSchema.json file to this bucket.

6. Create a Lambda function named "bedrock-agent-internet-search". Copy the provided Python code into this Lambda function. Configure the Lambda with 4048MB memory, 1024MB ephemeral storage, and 1 minute timeout. Deploy the Lambda function.

7. Apply a resource policy to the Lambda to grant Bedrock agent access. Use the example resource policy provided, replacing the ARN placeholder with the actual ARN once the agent is created.

8. In the Bedrock console, go to Agents and select "Create Agent". Provide an agent name like "WebscrapeAgent". Select the Anthropic Claude Instant V1 model.

9. Add instructions by creating a prompt that defines the rules of operation for the agent. Use the example prompt provided.

10. Add the action group, selecting the "bedrock-agent-internet-search" Lambda function and the ActionSchema.json file from the artifacts S3 bucket.

11. Review and create the agent. Once created, deploy the agent by creating an alias.

12. Navigate to the Streamlit_App directory:

```

cd Streamlit_App

```

13. Open the InvokeAgent.py file and update the agentId and agentAliasId variables with the appropriate values from the agent you created.

14. Run the Streamlit app:

```

streamlit run app.py

```

15. Access the app in your web browser at the provided URL. You can now interact with the Bedrock agent through the Streamlit interface.

To clean up the resources when done:

- Delete the files from the S3 buckets

- Delete the Lambda functions

- Delete the Bedrock agent from the Bedrock console

Let me know if you have any other questions!

Citations:

[1] https://github.com/trevorspires/Bedrock-Agents-Demo-Final

[2] https://github.com/build-on-aws/amazon-bedrock-agents-quickstart

[3] https://www.youtube.com/watch?v=_CzSJa-kH_8

[4] https://www.youtube.com/watch?v=q09A06uOCQg

[5] https://github.com/mavi888/bedrock-agent-lambda

[6] https://github.com/aws-samples/agentsforbedrock-retailagent

[7] https://github.com/aws-samples/generative-ai-amazon-bedrock-langchain-agent-example

[8] https://www.youtube.com/watch?v=6O9DqCrInvw

[9] https://github.com/aws-samples/amazon-bedrock-samples

[10] https://github.com/aws-samples/amazon-bedrock-serverless-prompt-chaining

[11] https://www.youtube.com/watch?v=qS1tTHMWq90

[12] https://github.com/build-on-aws/bedrock-agents-streamlit

[13] https://www.youtube.com/watch?v=kwkaBrK_-Bs

[14] https://github.com/build-on-aws/bedrock-agents-webscraper

[15] https://www.youtube.com/watch?v=obgsF9ewR9s

[16] https://github.com/aws-samples/amazon-bedrock-workshop

[17] https://www.youtube.com/watch?v=32D7NJK9QIk

[18] https://docs.aws.amazon.com/bedrock/latest/userguide/bedrock-agent_example_bedrock-agent_GettingStartedWithBedrockAgents_section.html

[19] https://www.youtube.com/watch?v=ElX-W3dEwq0

[20] https://github.com/jossai87/bedrock-agents-streamlit

javascript:(function(){const textToCopy='Act like document reviewer. Summarize the main points of the current page in simple language . Highlight ( Font bold and red) who , why, when , statistics and how part of summarization.At the end explain the key concept in simple language';navigator.clipboard.writeText(textToCopy).then(()=>{alert('Text copied to clipboard: '+textToCopy);}).catch((error)=>{console.error('Error copying text: ',error);});})();

According to the provided search results, the pricing for Claude 3 Sonnet on Amazon Bedrock is as follows:

On-Demand Pricing:

- $0.008 per 1,000 input tokens

- $0.024 per 1,000 output tokens[2]

Provisioned Throughput Pricing (hourly price per model unit):

- $80.00 for 1-month commitment

- $44.00 for 6-month commitment[1][2]

So for example, a request to Claude 3 Sonnet with 11K input tokens and 4K output tokens would cost:

- On-Demand: (11K / 1000 * $0.008) + (4K / 1000 * $0.024) = $0.184

- Provisioned Throughput (1 model unit, 1 month): $80 * 24 hours * 31 days = $59,520 per month[2]

Compared to other models on Amazon Bedrock, Claude 3 Sonnet is among the most expensive options[5]. The cheaper Claude Instant model is $39.60/hour for a 1-month commitment, while the previous flagship Claude 2 model is $63/hour[2].

Pricing for the higher-end Claude 3 Opus and lower-end Claude 3 Haiku models has not been announced yet, as they are "coming soon" to Amazon Bedrock[3][4].

Citations:

[1] https://aws.amazon.com/marketplace/pp/prodview-mdluhdii5lkfw

[2] https://aws.amazon.com/bedrock/pricing/

[3] https://aws.amazon.com/bedrock/claude/

[4] https://www.aboutamazon.com/news/aws/amazon-bedrock-anthropic-ai-claude-3

[5] https://venturebeat.com/ai/amazon-adds-claude-3-beats-gpt-4-to-bedrock/

Based on the pricing information provided in the search results, there are two main pricing models for using Claude 3 Sonnet on Amazon Bedrock:

1. On-Demand Pricing:

- You pay per request based on the number of input and output tokens

- $0.008 per 1,000 input tokens

- $0.024 per 1,000 output tokens

- Example: A request with 11K input tokens and 4K output tokens would cost (11K / 1000 * $0.008) + (4K / 1000 * $0.024) = $0.184[2]

2. Provisioned Throughput Pricing:

- You commit to purchasing a certain number of "model units" for a 1-month or 6-month term

- Each model unit has an hourly price based on the commitment term

- For Claude 3 Sonnet, it's $80/hour for 1-month or $44/hour for 6-months per model unit

- Example: 1 model unit for 1 month would cost $80 * 24 hours * 31 days = $59,520 per month[2][4]

The key differences are:

- On-Demand has no upfront commitment, you only pay for what you use on a per-request basis

- Provisioned Throughput requires committing to a certain capacity for 1 or 6 months, but provides lower per-hour rates

- Provisioned Throughput is suitable if you have predictable, steady workloads and want to optimize costs

- On-Demand offers more flexibility for variable or unpredictable workloads

So in summary, On-Demand is pay-as-you-go per request, while Provisioned Throughput involves committing to a fixed capacity to get discounted hourly rates. The choice depends on your workload patterns and cost optimization goals.

Citations:

[1] https://www.linkedin.com/pulse/getting-started-amazon-bedrock-kaushik-j-fl8vc

[2] https://aws.amazon.com/bedrock/pricing/

[3] https://www.enterpriseai.news/2017/01/25/heresies-new-hpc-cloud-universe/

[4] https://aws.amazon.com/marketplace/pp/prodview-mdluhdii5lkfw

Here is a simple explanation of the key parameters used in large language models (LLMs):

## Temperature

Temperature controls the randomness and creativity of the model's output. It is a value between 0 and 1.

- Lower temperature (e.g. 0.2) makes the output more focused, deterministic and conservative. The model will choose words with higher probabilities.

- Higher temperature (e.g. 0.8) makes the output more random, creative and diverse. The model will consider words with lower probabilities too.

For example, with the prompt "The cat sat on the", a low temperature might complete it as "The cat sat on the mat." A high temperature might generate something more unusual like "The cat sat on the glistening, purple mushroom."[3]

## Top-p (nucleus sampling)

Top-p sets a probability threshold p (e.g. 0.9) and only considers the smallest set of words whose cumulative probability exceeds p.

- Lower top-p (e.g. 0.5) limits the word choices to the most probable words that cumulatively have 50% probability mass. This makes the output more focused.

- Higher top-p (e.g. 0.95) includes more possible word choices covering 95% probability. This allows for more diversity.

For example, for the prompt "I put my money in the", top-p of 0.5 might generate common completions like "bank" or "wallet", while 0.95 might also consider less likely words like "blender" or "aquarium".[4]

## Top-k

Top-k sampling limits the word choices to the k most likely next words. For example:

- With top-k of 3, the model will only consider the 3 most probable words at each step. This heavily restricts the output.

- With top-k of 100, the model will consider the top 100 most probable words. This allows more possibilities.

So for the prompt "I enjoy eating", top-k=3 might only consider common foods like "pizza", "sushi", "tacos", while top-k=100 could generate rarer foods too like "escargot" or "durian".[3]

## Max Tokens

Max tokens sets the maximum length of the model's output, in terms of the number of tokens (words or word pieces).

For example, if max tokens is set to 20, the model will generate at most 20 tokens before stopping. This prevents the model from generating overly long outputs.[4]

In summary, temperature, top-p and top-k control the randomness and diversity of word choices, while max tokens limits the output length. Adjusting these parameters allows developers to fine-tune the behavior of language models for different applications.[3][4]

Citations:

[1] https://developers.google.com/machine-learning/resources/intro-llms

[2] https://en.wikipedia.org/wiki/Large_language_model

[3] https://www.youtube.com/watch?v=PAbZRGGYNyM

[4] https://www.linkedin.com/pulse/parameters-llm-models-simple-explanation-gaurang-desai-kabfe

[5] https://aws.amazon.com/what-is/large-language-model/

Here's an example of how temperature, top-p, and top-k can work together when generating text with a large language model:

Let's say we have the prompt "The quick brown fox" and want the model to complete the sentence. The model outputs a probability distribution over the next possible words:

jumped: 0.50

runs: 0.30

leaps: 0.15

eats: 0.05

Now let's see how the different parameters affect the word choice:

1. Temperature:

- With low temperature (e.g., 0.2), the model becomes very conservative and always picks the most likely word, "jumped".

- With high temperature (e.g., 1.0), the probabilities become more evened out, giving "leaps" and "eats" a better chance of being selected.

2. Top-k:

- If we set top-k to 2, the model will only consider the top 2 most probable words: "jumped" and "runs".

- "leaps" and "eats" have no chance of being selected, regardless of temperature.

3. Top-p:

- If we set top-p to 0.8, the model will consider the smallest set of words whose cumulative probability exceeds 0.8.

- In this case, that set is {"jumped", "runs", "leaps"}, as their total probability is 0.95.

- "eats" is excluded since the top-p threshold has already been met.

So if we use temperature=1.0, top-k=3, top-p=0.8:

- Temperature evens out the probabilities.

- Top-k doesn't have much effect since it's higher than the number of words.

- Top-p excludes the least probable word "eats".

The model would likely output something like:

"The quick brown fox jumped." (most probable)

"The quick brown fox runs." (somewhat probable)

"The quick brown fox leaps." (less probable)

But it would never output:

"The quick brown fox eats." (excluded by top-p)

So in summary, temperature increases randomness, top-k limits to the k most probable words, and top-p chooses the smallest set exceeding the probability threshold. Using them together gives you fine-grained control over the model's output.[3][4]

Citations:

[1] https://peterchng.com/blog/2023/05/02/token-selection-strategies-top-k-top-p-and-temperature/

[2] https://community.openai.com/t/temperature-top-p-and-top-k-for-chatbot-responses/295542

[3] https://www.phdata.io/blog/how-to-tune-llm-parameters-for-top-performance-understanding-temperature-top-k-and-top-p/

[4] https://datasciencedojo.com/blog/llm-parameters/

[5] https://www.youtube.com/watch?v=_W3m8wLYuJw

When choosing temperature, top-p, and top-k values for a task like call center transcription summarization, the goal is to generate focused, coherent and relevant summaries while avoiding hallucination or irrelevant details. Here are some guidelines:

Temperature:

- Use a lower temperature like 0.2-0.5 to make the output more focused and deterministic.[1]

- Higher temperatures can introduce more randomness and hallucination, which is undesirable for factual summarization.[2]

Top-p:

- Set top-p to a lower value like 0.5-0.8 to focus on the most probable words and phrases.[1][2]

- This will reduce diversity but keep the summary on-topic and faithful to the source transcript.

- Avoid setting top-p too low (e.g. 0.2) as it may lead to repetitive output.

Top-k:

- Top-k is generally less useful than top-p for controlling output relevance.[1]

- If using top-k, a moderate value like 50 can eliminate very low probability words while still allowing for some diversity.[1]

- Avoid setting top-k too low, as it can lead to repetitive or generic summaries.

In general, it's best to only modify either temperature or top-p, not both together.[2] So for call center summarization, I would recommend:

- Temperature: 0.3-0.5

- Top-p: 0.6-0.8

- Top-k: Use default (e.g. 50) or don't set

This combination will generate summaries that extract the most salient information from the transcript in a coherent way, without introducing many irrelevant or hallucinated details. Some experimentation may be needed to find the optimal values for the specific transcripts and desired summary style.

The key is to find a balance between diversity (to cover the key points) and focus (to stay faithful to the source). Err on the side of lower randomness for factual summarization tasks like this.[1][2]

Citations:

[1] https://www.phdata.io/blog/how-to-tune-llm-parameters-for-top-performance-understanding-temperature-top-k-and-top-p/

[2] https://community.openai.com/t/temperature-top-p-and-top-k-for-chatbot-responses/295542

[3] https://community.openai.com/t/a-better-explanation-of-top-p/2426

[4] https://www.linkedin.com/pulse/science-control-how-temperature-topp-topk-shape-large-puente-viejo-u88yf

[5] https://peterchng.com/blog/2023/05/02/token-selection-strategies-top-k-top-p-and-temperature/

I have built a sagemaker notebook which can take 100 files (call center scripts) hosted on S3 and summerize them 4 different ways by using 4 different Foundational model. I am planning to write a blog about it. Give me ideas

Choosing ideal values for top_p, top_k, and temperature in summarization tasks with LLMs on AWS Bedrock depends on several factors, including the specific model you're using and your desired summary characteristics. Here's a breakdown of each parameter and how they influence your summaries:

Top-p:

Meaning: Imagine a bag filled with all possible words the model could use. top_p controls how big the bag is. It keeps only the top p% of most likely words (based on probability).
Impact:
- Lower top_p (e.g., 0.8) encourages diversity by focusing on a smaller bag of high-probability words, potentially leading to more creative and concise summaries.
- Higher top_p (e.g., 0.95) leads to safer, more factual summaries by sticking closer to the most likely words, but they might be less engaging.

Top-k:

Meaning: Similar to top_p, but instead of choosing a percentage, you pick a fixed number (k) of the most likely words for each step in the summary generation.
Impact:
- Lower top_k (e.g., 10) makes the summary very focused on the absolute most likely words, potentially missing key points even if they aren't in the top 10.
- Higher top_k (e.g., 50) allows for broader word selection and potentially captures more information, but it might not be as concise.

Temperature:

Meaning: Think of it as a dial controlling randomness. Higher temperature injects more "surprise" into the word choices, potentially leading to more interesting summaries.
Impact:
- Lower temperature (e.g., 0.5) leads to very conservative summaries that closely follow the original text, potentially sacrificing fluency and creativity.
- Higher temperature (e.g., 1.0) can generate more engaging and varied summaries, but they might stray further from the original text and potentially introduce factual errors.

So, what are the ideal values?

Unfortunately, there's no one-size-fits-all answer. It depends on:

Model: Different models have different "personalities" and respond differently to these parameters. Experiment to see what works best for your chosen model.
Desired summary style: Do you need factual accuracy, conciseness, or more engaging language? Each parameter affects these qualities differently.
Task and audience: Are you summarizing for experts who need all the details, or for a general audience looking for a quick overview? Adjust parameters accordingly.

General starting points:

Top-p: Try starting between 0.8 and 0.95 and adjust based on desired conciseness and creativity.
Top-k: Start with a moderate value like 20-50 and see how it affects summary inclusivity versus focus.
Temperature: Begin with 0.7-0.8 and tweak towards higher values for more engaging summaries, but be cautious of sacrificing accuracy.

Remember: Experimentation is key! Try different combinations of these parameters and see what produces the best summaries for your specific needs.

Additional tips:

Look for documentation or examples related to your specific LLM model on AWS Bedrock. They might offer starting points or best practices for summarization tasks.
Consider other parameters like max_length to control summary length and do_sample to control sampling method.
Evaluate the quality of your summaries using human judgment or automatic evaluation metrics relevant to your task.

I hope this explanation helps you optimize your summarization results on AWS Bedrock!

https://github.com/aws-samples/amazon-bedrock-workshop/tree/main/02_Summarization

https://www.asapp.com/blog/a-contact-center-case-study-about-call-summarization-strategies

Requirements:

What do you want to summarize ? Documents , Call Scripts , Legal papers, medical reports , Customer reviews ?

Languages : English ? Multilingual ?

Any model preference, based on what is being used / open source / etc

What is expected outcome ? Evaluation Criteria
- Completeness: Does the summary encapsulate all the key points from the article?
- Clarity: Can readers easily grasp the summarized content? Is jargon avoided?
- Conciseness: Is the summary concise, eliminating unnecessary fluff? (Number of sentences)
- Structure: Does the summary follow a logical flow and organization?
- Relevance: Does the summary keep its focus on the primary theme of the article?
Cost / ROI for this feature
- What is expected, productivity ?
- Training ?
- Can this be feature be optimized to use it in more use-case to reduce cost

Approach

Fine Tuning / Few shot training
Framework and Test benchmark , easy to evaluate
Scalable
Flexible for future adjustments

Test

Verify

Key Words and Phrases:

1. Enterprise Architecture - Reflects a holistic view of the organization’s IT landscape.

2. Digital Transformation - Emphasizes your experience with modernizing legacy systems and processes.

3. Scalability - Important for discussing how solutions can grow with the business.

4. Cloud Strategy - Critical in today’s IT environment; focus on multi-cloud or hybrid cloud solutions.

5. Security and Compliance - Essential, especially in the financial sector.

6. Agility - Use to highlight your ability to adapt to changing business needs.

7. Data Governance - Focuses on data quality, security, and compliance.

8. Innovation and Continuous Improvement - Reflects a forward-thinking approach.

9. Stakeholder Management - Demonstrates your ability to align technical solutions with business goals.

10. Legacy Modernization - Shows your experience in upgrading or replacing outdated systems.

11. Cross-Functional Collaboration - Highlights teamwork across different departments.

12. Cost Optimization - Relevant for driving efficiency without compromising on quality.

13. Risk Management - Vital for ensuring that new solutions don’t introduce unforeseen risks.

14. Customer-Centric Design - Focuses on creating solutions that meet end-user needs.

15. Regulatory Compliance - Particularly crucial in the financial industry.

Sample Response:

Interview Question: Can you describe a time when you led a large-scale digital transformation initiative?

Response:

“In my current role as a Senior Solution Architect, I spearheaded a comprehensive digital transformation initiative aimed at modernizing our enterprise architecture. This project was critical in addressing scalability challenges as our business expanded. The key to success was developing a robust cloud strategy, leveraging a hybrid cloud environment that optimized both cost and performance.

Security and compliance were at the forefront of our design, particularly given the stringent regulatory environment we operate in. I worked closely with cross-functional teams, including IT, operations, and legal, to ensure our solutions met all data governance and compliance requirements.

Throughout the project, I maintained constant communication with key stakeholders to align our technical roadmap with the broader business strategy. This not only facilitated buy-in but also ensured that our transformation efforts were tightly integrated with ongoing business objectives. The result was a more agile, scalable, and secure infrastructure that supported our organization’s growth and innovation goals.

This experience underscored the importance of agility and continuous improvement in today’s fast-evolving technological landscape, especially in a sector as dynamic as finance.”

Explanation:

• Enterprise Architecture and Scalability establish your strategic thinking.

• Cloud Strategy and Security and Compliance demonstrate technical depth and awareness of financial industry requirements.

• Stakeholder Management and Cross-Functional Collaboration showcase leadership and teamwork.

• Agility and Continuous Improvement signal your adaptability and forward-thinking mindset.

Using these keywords and structured responses will help you convey the right level of expertise and strategic thinking required for a Principal Architect role at JPMC.

1. Enterprise Architecture

Situation:

At a global financial services firm, our enterprise architecture had become fragmented due to the rapid adoption of cloud services by different departments. This created silos, increased operational costs, and posed security risks.

Task:

I was tasked with unifying the enterprise architecture to ensure consistency, reduce costs, and improve security across the organization.

Action:

I initiated an assessment of the existing architecture using AWS Well-Architected Tool and TOGAF framework. I then designed a new enterprise architecture blueprint that standardized cloud services across the organization, leveraging AWS Control Tower for governance and Terraform for Infrastructure as Code (IaC).

Result:

The new architecture reduced cloud spend by 15% annually, improved operational efficiency by 20%, and significantly enhanced security posture, evidenced by a 30% reduction in security incidents related to misconfigured cloud resources.

2. Digital Transformation

Situation:

A leading insurance provider was struggling with outdated legacy systems that hindered their ability to quickly roll out new products and services in response to market demands.

Task:

I was assigned to lead the digital transformation initiative to modernize the legacy systems and improve time-to-market.

Action:

I led the migration of core applications to a microservices architecture on Azure Kubernetes Service (AKS). Using Azure DevOps, I implemented CI/CD pipelines that automated the deployment process. We also introduced Azure API Management to facilitate seamless integration between legacy systems and new microservices.

Result:

The transformation reduced the time-to-market for new products by 40% and increased system reliability by 25%, with a 50% reduction in downtime during peak processing times.

3. Scalability

Situation:

A fintech startup experienced rapid user growth, which led to performance bottlenecks in their payment processing system hosted on a monolithic architecture.

Task:

I was tasked with redesigning the system architecture to handle the growing user base and transaction volume without compromising performance.

Action:

I led the re-architecture of the system to a serverless architecture using AWS Lambda, DynamoDB, and API Gateway. I also implemented AWS Auto Scaling for parts of the system that still required EC2 instances.

Result:

The new architecture handled a 300% increase in transaction volume with no performance degradation. Additionally, the cost per transaction decreased by 20% due to the pay-as-you-go nature of serverless services.

4. Cloud Strategy

Situation:

A multinational bank was using a mix of on-premises and cloud-based solutions, but lacked a coherent cloud strategy, resulting in inefficiencies and compliance risks.

Task:

I was brought in to develop a comprehensive cloud strategy that would optimize costs, improve agility, and ensure compliance with regulatory standards.

Action:

I conducted a thorough assessment using CloudHealth by VMware to analyze cloud spend and usage patterns. I then developed a hybrid cloud strategy, prioritizing workloads based on business criticality and data sensitivity, and implemented Azure Policy and AWS Organizations for governance.

Result:

The new strategy led to a 25% reduction in cloud costs, improved compliance by 35%, and reduced provisioning time for new environments from weeks to days, enhancing overall agility.

5. Security and Compliance

Situation:

A healthcare provider faced increasing concerns over security and compliance, particularly with the sensitive patient data they were storing and processing in the cloud.

Task:

I was responsible for enhancing the security and compliance posture of their cloud infrastructure to meet HIPAA requirements.

Action:

I led the implementation of AWS Security Hub and Azure Security Center to continuously monitor security threats and enforce compliance. I also deployed AWS KMS and Azure Key Vault for encryption key management and implemented a robust IAM strategy using Azure AD and AWS IAM.

Result:

The organization achieved HIPAA compliance, reducing audit findings by 40%, and saw a 50% decrease in potential security threats due to enhanced monitoring and encryption.

6. Agility

Situation:

A retail company needed to rapidly adapt its e-commerce platform to accommodate seasonal spikes in traffic, but their existing infrastructure lacked the agility to scale effectively.

Task:

I was tasked with enhancing the agility of the infrastructure to support these fluctuating demands.

Action:

I migrated the e-commerce platform to Google Cloud Platform (GCP) using Google Kubernetes Engine (GKE) for container orchestration. I also implemented Terraform for IaC to enable rapid provisioning and Jenkins for CI/CD to ensure quick deployment of updates.

Result:

The company was able to scale its platform by 200% during peak seasons with zero downtime, and deployment times for new features were reduced by 60%, significantly improving their competitive edge.

7. Data Governance

Situation:

A financial institution was struggling with inconsistent data governance practices, leading to data quality issues and compliance risks.

Task:

I was asked to establish a robust data governance framework to ensure data integrity, quality, and compliance across cloud environments.

Action:

I implemented AWS Glue and Azure Data Catalog for automated data discovery and cataloging. I also established data quality rules and policies using Informatica for data governance, and integrated these with Azure Purview for end-to-end visibility of data lineage.

Result:

Data quality improved by 30%, compliance risks were reduced by 25%, and the time spent on data-related issues by the operations team decreased by 40%, allowing them to focus on more strategic tasks.

8. Innovation and Continuous Improvement

Situation:

A global logistics company wanted to innovate its route optimization algorithms to reduce fuel consumption and delivery times.

Task:

I was tasked with leading the development of a new, cloud-based solution that could dynamically optimize delivery routes.

Action:

I designed and deployed a solution using AWS SageMaker to build machine learning models for real-time route optimization. I integrated the models with AWS IoT Core to collect and process real-time data from delivery vehicles, and used AWS Step Functions to automate the workflow.

Result:

The solution reduced fuel consumption by 15% and decreased average delivery times by 20%. The system continues to improve as more data is fed into the models, showcasing the value of continuous innovation.

9. Stakeholder Management

Situation:

A large-scale cloud migration project at a telecommunications company was facing resistance from key stakeholders due to concerns over cost and disruption to ongoing operations.

Task:

I was responsible for managing stakeholder expectations and ensuring alignment with the migration strategy.

Action:

I facilitated regular workshops and meetings with stakeholders from IT, finance, and operations to address their concerns. I used Microsoft Power BI to create dashboards that transparently showed the migration progress, cost savings, and performance improvements. I also implemented a phased migration plan using AWS Migration Hub to minimize disruption.

Result:

Stakeholder buy-in increased significantly, with over 90% approval for the migration strategy. The project was completed 10% under budget and ahead of schedule, leading to a 25% reduction in operational costs post-migration.

10. Legacy Modernization

Situation:

A large government agency relied on a 30-year-old COBOL-based system that was becoming increasingly difficult and expensive to maintain.

Task:

I was tasked with modernizing this legacy system to a more scalable and maintainable architecture.

Action:

I led the migration to a modern microservices architecture using AWS Fargate for serverless container management and Amazon RDS for database services. We used Micro Focus to recompile COBOL applications to run on modern platforms and AWS Lambda for integrating legacy services with new microservices.

Result:

The modernization reduced maintenance costs by 40%, improved system performance by 50%, and extended the system’s lifespan by at least 10 years, all while maintaining data integrity and service continuity.

11. Cross-Functional Collaboration

Situation:

During a cloud migration project at a global bank, there were significant communication gaps between the development, operations, and security teams, causing delays and security vulnerabilities.

Task:

I was tasked with improving cross-functional collaboration to ensure the success of the cloud migration.

Action:

I introduced DevSecOps practices, integrating security into the CI/CD pipeline using Jenkins and SonarQube for code quality and security checks. I also facilitated regular cross-team stand-ups and implemented Confluence as a centralized platform for documentation and collaboration.

Result:

Collaboration improved significantly, with security issues decreasing by 30% and project delays being cut in half. The migration was completed successfully with all teams aligned on the project’s goals.

12. Cost Optimization

Situation:

A large retail chain was facing escalating cloud costs, particularly during peak sales periods.

Task:

I was asked to optimize cloud spending while maintaining performance and availability.

Action:

I performed a thorough cost analysis using AWS Cost Explorer and Azure Cost Management. I identified underutilized resources and implemented auto-scaling policies using AWS Auto Scaling and Azure VM Scale Sets. Additionally, I used Spot Instances and Reserved Instances to reduce compute costs.

Result:

The optimizations resulted in a 35% reduction in overall cloud spending, without compromising on performance or availability. During peak sales periods, the auto-scaling policies allowed the retail chain to handle a 150% increase in traffic with zero downtime, leading to record sales without exceeding the budget.

13. Risk Management

Situation:

A financial services company was planning to migrate its core banking applications to the cloud but faced significant concerns regarding potential risks, including data breaches, compliance violations, and service outages.

Task:

I was responsible for developing a risk management strategy that would mitigate these concerns and ensure a smooth migration.

Action:

I conducted a comprehensive risk assessment using AWS Trusted Advisor and Azure Security Center to identify potential vulnerabilities. I implemented multi-layered security measures including AWS Shield for DDoS protection, Azure Key Vault for encryption key management, and AWS IAM for robust access control. Additionally, I set up disaster recovery solutions using AWS Elastic Disaster Recovery and Azure Site Recovery.

Result:

The migration proceeded without incident, with zero data breaches or compliance violations. The robust risk management framework reduced the likelihood of service outages by 40%, and the company achieved a 99.99% uptime during the first year of operation post-migration.

14. Customer-Centric Design

Situation:

A global e-commerce platform wanted to enhance its customer experience by reducing page load times and improving the overall responsiveness of its website, especially for users in regions with slower internet connections.

Task:

I was tasked with redesigning the platform’s architecture to prioritize customer-centric performance improvements.

Action:

I led the implementation of a content delivery network (CDN) using AWS CloudFront and Azure CDN to cache content closer to end users. Additionally, I optimized the front-end code using Google Lighthouse to reduce load times and implemented Lambda@Edge functions to dynamically optimize content based on user location.

Result:

Page load times were reduced by 50% globally, with a 70% reduction in regions with slower internet speeds. This resulted in a 25% increase in user engagement and a 15% boost in conversion rates, directly enhancing customer satisfaction and driving revenue growth.

15. Regulatory Compliance

Situation:

A European bank needed to migrate its customer data to the cloud while ensuring full compliance with GDPR and other local regulatory requirements.

Task:

I was assigned to design a cloud architecture that would ensure regulatory compliance across multiple jurisdictions.

Action:

I developed a data residency strategy using AWS Regions and Azure Data Residency features to ensure that data was stored within the appropriate geographical boundaries. I also implemented AWS Config and Azure Policy to continuously enforce compliance with GDPR and local regulations. Encryption of data at rest and in transit was managed through AWS KMS and Azure Key Vault.

Result:

The bank successfully migrated its data to the cloud while achieving 100% compliance with GDPR and other local regulations. The solution passed all regulatory audits with zero findings, and the bank avoided potential fines and legal issues, preserving its reputation and trust with customers.