Wednesday, March 5, 2025

Text-to-SQL Agent

To See All Articles About Technology: Index of Lessons in Technology

Download Code

from langchain_google_genai import ChatGoogleGenerativeAI

import json
with open('../api_keys.json', mode = 'r') as f:
    api_keys = json.load(f)
    
# Initialize the client with your API key
llm = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash",
    google_api_key=api_keys['ashishjain1547']
)

from langgraph.graph import StateGraph, END

from langchain_community.document_loaders import WebBaseLoader
from langchain.chat_models import ChatOpenAI
import os

from typing import TypedDict, Annotated
class AgentState(TypedDict):
    nlq: str
    metadata: dict
    refined_query: str
    further_refined_query: str
    feedback: str
    itr_count: int

import re
def extract_triple_quoted_json(response_text):
    # This pattern looks for a JSON object (starting with '{' and ending with '}')
    # enclosed in either triple double-quotes or triple single-quotes.
    pattern = r'(?:\'\'\'|""")\s*(\{.*?\})\s*(?:\'\'\'|""")'
    match = re.search(pattern, response_text, re.DOTALL)
    if match:
        return match.group(1)
    return None


class ReEngineerQuery:
    def __init__(self, model):
        self.model = model
        
        graph = StateGraph(AgentState)


        graph.add_node("refine_query", self.refine_query)
        graph.add_node("evaluate_reengineered_query", self.evaluate_reengineered_query)
        graph.add_node("further_refine_query", self.further_refine_query)

        # --- Edges ---


        graph.set_entry_point("refine_query")
        graph.add_edge("refine_query", "evaluate_reengineered_query")
        graph.add_edge("evaluate_reengineered_query", "further_refine_query")


        graph.add_conditional_edges(
            "further_refine_query", 
            self.should_continue, 
            {"end": END, "refine_query": "refine_query"}
        )

        # Compile the graph and store references
        self.graph = graph.compile()

        

        
    
    def refine_query(self, state):
        META_PROMPT_TO_REENGINEER_NLQ = """
You are an expert SQLite query generator. Based on the natural language query and the provided table metadata,
please reengineer the query to clearly specify:
- The specific table(s) that should be referenced,
- The calculations or aggregations to perform,
- The structure of the final SQL query.

NLQ: {nlq}

Table Metadata:
{metadata}

Reengineered Query:
"""

        # Build the meta-prompt by substituting the NLQ and metadata.
        prompt = META_PROMPT_TO_REENGINEER_NLQ.format(nlq=state.get("nlq", ""), metadata=state.get("metadata", ""))
        # Invoke the LLM with the prompt.
        response = self.model(prompt)
        # Return the refined query.


        return {"refined_query": response.strip(), "itr_count": state.get("itr_count") + 1}
    
    def evaluate_reengineered_query(self, state):
        EVALUATE_REENGINEERED_QUERY = """
You are an expert SQLite engineer grading an NLQ for correctness, completeness and clarity. \
Generate critique and recommendations for the NLQ so it can be easily converted to an SQL. \
Please evaluate the reengineered query below:
{refined_query}
"""

        prompt = EVALUATE_REENGINEERED_QUERY.format(refined_query=state.get("refined_query", ""))
        response = self.model(prompt)
        return {"feedback": response.strip()}
    
    def further_refine_query(self, state):
        REENGINEER_QUERY_POST_FEEDBACK = """
You are an expert SQLite query generator. Based on the natural language query, the provided table metadata and feedback,
please reengineer the query based on the feedback given to clearly specify: 
- The specific table(s) that should be referenced,
- The calculations or aggregations to perform,
- The structure of the final SQL query.

NLQ: {nlq}

Table Metadata:
{metadata}

Feedback:
{feedback}

Reengineered Query:
"""     # Build the meta-prompt by substituting the NLQ and metadata.
        prompt = REENGINEER_QUERY_POST_FEEDBACK.format(nlq=state.get("nlq", ""), metadata=state.get("metadata", ""),
                                                        feedback=state.get("feedback", ""))
        # Invoke the LLM with the prompt.
        response = self.model(prompt)

        print(response)


        return {"further_refined_query": response.strip(), "itr_count": state.get("itr_count") + 1}
    
    def should_continue(self, state):
        CHECK_CONSISTENCY = """
You are an expert database query evaluator. Your task is to evaluate two queries \
if they are consistent and mean the same thing. One query is the last query and the other is the reengineered query. \

Last Query: {refined_query}
Reengineered Query: {further_refined_query}

Is the reengineered query consistent with the last query? 
Return a JSON response with the key 'answer': 'yes' or 'no'."""
        
        prompt = CHECK_CONSISTENCY.format(refined_query=state.get("refined_query", ""), further_refined_query=state.get("further_refined_query", ""))
        response = self.model(prompt)

        #extract json from the response

        print(response)
        
        json_response = "{" + response.split("{")[1].split("}")[0].strip().replace("'", '"') + "}"

        json_response = json.loads(json_response)
        print(json_response)

    
        if json_response['answer'] == "yes":
            return "end"
        return "refine_query"

class MyGeminiChatModel:
    """
    Minimal wrapper that expects a prompt and returns GPT-3.5 Turbo response text.
    """
    def __init__(self, api_key):
        self.client = ChatGoogleGenerativeAI(
        model="gemini-2.0-flash",
        google_api_key = api_key
    )
        
    def bind_tools(self, tools):
        return self  # For compatibility with how TranslatorCriticApp uses .bind_tools

    def __call__(self, prompt: str) -> str:
        response = llm.invoke(prompt)
        return response.content

model = MyGeminiChatModel(api_keys["ashishjain1547"])

app = ReEngineerQuery(model)



from IPython.display import Image
Image(app.graph.get_graph().draw_png())
<IMG>

with open('tables.json', mode = 'r') as f:
    metadata = json.load(f)

nlq = "Show me the orders from last year."
refined_query = ""
further_refined_query = ""
feedback = ""
itr_count = 0

result = app.graph.invoke({"nlq": nlq, "metadata": metadata, "refined_query": refined_query, "further_refined_query": further_refined_query, "feedback": feedback, "itr_count": itr_count})


SELECT * FROM AB_ORDERS WHERE ORDERYEAR = CAST(STRFTIME('%Y', DATE('now', '-1 year')) AS INTEGER)

nlq = "Show all tables"
refined_query = ""
further_refined_query = ""
feedback = ""
itr_count = 0

result = app.graph.invoke({"nlq": nlq, "metadata": metadata, "refined_query": refined_query, "further_refined_query": further_refined_query, "feedback": feedback, "itr_count": itr_count})

To show all tables in the SQLite database, you can use the following SQL query:

```sql
SELECT name
FROM sqlite_master
WHERE type='table';
```

This query retrieves the names of all tables present in the SQLite database by querying the `sqlite_master` table where the `type` column is equal to 'table'. Knowing all the tables in the database is essential for database management, schema understanding, and querying purposes. It helps in identifying the available data structures, relationships between tables, and overall database organization. This information is crucial for developers, analysts, and administrators to effectively work with the database and perform various operations such as data retrieval, manipulation, and optimization.

nlq = "What's in orders table?"
refined_query = ""
further_refined_query = ""
feedback = ""
itr_count = 0

result = app.graph.invoke({"nlq": nlq, "metadata": metadata, "refined_query": refined_query, "further_refined_query": further_refined_query, "feedback": feedback, "itr_count": itr_count})

SELECT * FROM AB_ORDERS

nlq = "Show me top 5 categories with respect to orders."
refined_query = ""
further_refined_query = ""
feedback = ""
itr_count = 0

result = app.graph.invoke({"nlq": nlq, "metadata": metadata, "refined_query": refined_query, "further_refined_query": further_refined_query, "feedback": feedback, "itr_count": itr_count})

SELECT C.CATEGORYNAME, COUNT(O.ORDERID) AS ORDER_COUNT
FROM AB_CATEGORIES C
JOIN AB_PRODUCTS P ON C.CATEGORYID = P.CATEGORYID
JOIN AB_ORDERDETAILS OD ON P.PRODUCTID = OD.PRODUCTID
JOIN AB_ORDERS O ON OD.ORDERID = O.ORDERID
GROUP BY C.CATEGORYNAME
ORDER BY ORDER_COUNT DESC
LIMIT 5;

nlq = "Which areas are dairy products sold?"
refined_query = ""
further_refined_query = ""
feedback = ""
itr_count = 0

result = app.graph.invoke({"nlq": nlq, "metadata": metadata, "refined_query": refined_query, "further_refined_query": further_refined_query, "feedback": feedback, "itr_count": itr_count})

SELECT DISTINCT C.CITY AS AREA
FROM AB_CUSTOMERS C
JOIN AB_ORDERS O ON C.CUSTOMERID = O.CUSTOMERID
JOIN AB_ORDERDETAILS OD ON O.ORDERID = OD.ORDERID
JOIN AB_PRODUCTS P ON OD.PRODUCTID = P.PRODUCTID
JOIN AB_CATEGORIES CAT ON P.CATEGORYID = CAT.CATEGORYID
WHERE CAT.CATEGORYNAME = 'Dairy Products';

nlq = "Compare orders from top two cities with respect to total sales."
refined_query = ""
further_refined_query = ""
feedback = ""
itr_count = 0

result = app.graph.invoke({"nlq": nlq, "metadata": metadata, "refined_query": refined_query, "further_refined_query": further_refined_query, "feedback": feedback, "itr_count": itr_count})

-- Query to compare total sales from the top two cities
SELECT 
    c.CITY AS City,
    SUM(p.PRICE * od.QUANTITY) AS TotalSales
FROM 
    AB_CUSTOMERS c
JOIN 
    AB_ORDERS o ON c.CUSTOMERID = o.CUSTOMERID
JOIN 
    AB_ORDERDETAILS od ON o.ORDERID = od.ORDERID
JOIN 
    AB_PRODUCTS p ON od.PRODUCTID = p.PRODUCTID
GROUP BY 
    c.CITY
ORDER BY 
    TotalSales DESC
LIMIT 2;
Tags: Generative AI,Artificial Intelligence,Technology,Database,

Top Indian IT firms reduce bench time amid uncertain times; average duration drops to 35-45 days

To See All Articles About Layoffs: Layoffs Reports
The average bench time at present has come down to 35-45 days compared to an average of 45-60 days in FY20 and FY21, when the sector’s revenue growth for the sector was in the higher double digits.

Top Indian IT services companies, including Tata Consultancy Services (TCS), Infosys, Wipro, HCLTech, and Accenture, among others, have been reducing bench sizes in the past year and a half in a bid to defend margins and improve utilisation rates as revenue growth remains slow.

Staffing firms and industry experts said that not just bench sizes, even the bench holding timelines have plunged significantly. Benching in IT services industry refers to the employees on payroll who haven’t been deployed on any active projects. They are usually kept as a backup in case of a sudden client demand arises.

According to data sourced from market intelligence firm UnearthInsight, the average bench time at present has come down to 35-45 days compared to an average of 45-60 days in FY20 and FY21, when the sector’s revenue growth was in the higher double digits. This trend is expected to continue in FY26 too.

Currently, employees with nine to 14 years of experience across legacy skills too are at the risk of bench layoffs, as niche skills related to artificial intelligence, machine learning and cloud are more in demand.

Meanwhile, benched employees which accounted for 10-15% of the average overall headcount mix of the IT companies have now come down to just 2-5%, according to data from staffing firm TeamLease Digital.

Kamal Karanth, co-founder of specialised staffing firm, Xpheno explained that the high bench volumes in calendar year 2022 and early 2023 were an outcome of the hyper-hiring in 2021 and early 2022, resulting in lower utilization rates.

“The resizing and rebalancing of headcounts since 2023, amidst revenue and margin pressures hit the bench volumes first to move the utilisation rates up again. Enterprises have since gone for a mix of staffing consumption for just-in-time workforce and subcon arrangements for longer tenures,” he told Moneycontrol.

Krishna Vij, business head-IT staffing at TeamLease Digital said, “From 70-75% utilisation, companies have started reaching 80-85% utilisation rates. The attrition has also reduced to 11-13% from 28-30%, When you are not losing people you will not be utilising the benched resources. With GCCs hiring directly from the talent pool, IT firms have started facing increased competition. So they started opting for leaner and project specific hiring models.

While current utilisation rates stayed in the optimal mid to late 80 percent range for IT firms, estimated bench sizes have shrunk by 15 percent compared to the size a year ago. On a 2-year basis, the estimated bench size reduction is nearly 22 percent, data from Xpheno said.

Select Tier-I firms such as TCS do maintain a slightly higher lateral bench to respond to clients for faster or immediate deployment. However, in times of slow down when deal closures are getting delayed, IT services firms have to optimise costs, according to Gaurav Vasu, founder and CEO, UnearthInsight.

He added, “Around 2-3 months is the current bench policy for laterals but Tier I firms like TCS, Infosys, Wipro, HCL, Accenture are looking at faster deployment to projects from bench hence bench optimization is a normal activity specially for skills not in demand or skills where demand visibility is weak.”

This reflected in the negative quarterly headcount addition of most of the top five Indian IT companies, even as overall the sector has started hiring more as compared to the previous fiscal seeing some green shoots in demand environment.

IT companies on the other hand have started highlighting a fundamental shift in traditional tech services business models and the need to overhaul it, as AI has started driving productivity gains for customers bringing down project tenures and sizes.

Last week, HCLTech CEO and MD C Vijayakumar said, “The business model is ripe for disruption, what we saw in the last 30 years a fairly linear scalling of IT service. I think the time is already out for that model and in the last couple of years we have been challenging our teams on how you can deliver twice the revenue with half the people, that’s really what I found I make through a lot of my teams.”

He was speaking at the Nasscom Technology and Leadership Forum (NTLF) 2025 in Mumbai.

Distribution of talent

Not just demand cycles and AI disruption, the bench layoff trends are also an implication of the location of the delivery centres, said Vasu.

He shared that managing a bench in smaller cities both globally and in India has been difficult as niche skills-focused projects don’t see many takers for the tier II cities often. “IT companies try to take fungible skills or vanilla skills to Tier-II cities, global low cost cities as both time and cost of bench could directly impact EBIT negatively. Currently 0% to 0.25% of the headcount of a Tier-II city based campus or delivery centre will be on bench across vanilla (legacy skills) and niche skills,” he said.

Another staffing firm business lead seeking anonymity said that certain IT majors especially Tier-I firms don’t want to keep any bench. They are even passing the bench pressure to staffing firms.

“They don’t have to invest on the bench, but staffing firms have to bear the costs to retain the engagement with the client IT firm. Depending on how the projects come, staffing firms will then deploy the candidates. Until then, the benched candidates will be on staffing firms’ payrolls. That’s one way of doing it,” the person said.

Ref

Layoffs in the US - How to protect your career as Meta and the federal government announce cuts

To See All Articles About Layoffs: Layoffs Reports
With major companies like Meta and federal departments announcing significant layoffs, workers across the US are facing growing uncertainty. As companies adjust their workforce to respond to economic shifts, professionals must take proactive steps to secure their careers and protect their futures. According to experts, strategic planning now can prevent hasty decisions later, giving workers the best chance to navigate potential job cuts successfully.

As reported by Forbes, layoffs are not confined to just tech giants like Meta but are also impacting government agencies. This broad wave of cuts means it's critical for employees to consider their options early and ensure they're prepared for what's next, whether that means transitioning, relocating, or even starting a new career path.

Evaluate your career goals and long-term plans

Before making any decisions about staying or leaving, it's important to think about your career goals. Financial advisors have been found to suggest that you need to assess what kind of role you want if layoffs become a reality. Would you prefer to stay at the company if a buyout offer is presented, or would you consider consulting or moving to a new opportunity? Even if layoffs are not immediately looming, taking time to envision your long-term goals can spark interest in pursuing something new. Perhaps becoming a digital nomad or launching your own business could be a more appealing option.

Prepare a thorough handover plan for a smooth transition

If layoffs are inevitable, creating a detailed handover plan is critical to maintaining professional relationships and ensuring a smooth exit. As Forbes highlights, it's essential to document key processes and responsibilities that your successor could pick up. This might include creating an FAQ guide for common tasks or identifying potential colleagues who might be suitable replacements for your role. Not only will this improve your departure experience, but it could also benefit your reputation, leaving a positive impression with your employer and colleagues. Additionally, remember to handle your personal transition. Updating personal contacts, ensuring your work accounts are separated from your personal life, and saving any work samples or projects you might need in the future are crucial steps for a seamless shift, whether you leave voluntarily or involuntarily.

Align with your manager on your departure narrative

In times of layoffs, managing the narrative of your departure is important. So, align with your manager on how he will describe your exit, especially if only a few employees are affected. Whether it's framed as a strategic move or due to economic pressures, having a consistent story is vital for your future job search. If you're leaving voluntarily, it's important to communicate why in a way that reflects positively on your career. Are you making a move for better opportunities, or are there frustrations that you want to express? How you frame your exit can play a significant role in how you're perceived by future employers.

Start preparing now to stay ahead of job market competition

The job market often becomes highly competitive when layoffs occur, flooding the market with candidates. It's crucial to start your job search and career planning before the competition intensifies. Even if you're not ready to start applying, taking steps such as updating your resume, refining your LinkedIn profile, and reconnecting with your professional network can put you ahead of the curve. By understanding which industries and roles align with your skills, you'll be better prepared when the time comes to make a move. Early preparation ensures you're ready to navigate the job market, which might be more competitive than ever following a wave of layoffs.

First offers might be the best offers in times of layoffs

If layoffs are already happening, the initial round of severance offers often includes better benefits, such as larger severance pay, more support for job placement, and greater flexibility in transition logistics. For those who decide to stay with their current employer, it's essential to be aware that further layoffs could be on the horizon, making it crucial to assess whether it's better to leave early for a more generous package. With layoffs affecting multiple sectors, including both private companies like Meta and government agencies, employees must be proactive in securing their careers. By planning ahead and considering all options, workers can navigate these uncertain times with confidence, ensuring that they remain in control of their professional future. Ref

Day 3 - Issue of "database locked" with SQLite

To See All Articles About Technology: Index of Lessons in Technology

How to resolve: sqlite3.OperationalError: database is locked

The "database is locked" error in SQLite occurs when a database file is accessed by multiple processes or connections simultaneously, causing contention. Here’s how to resolve it:


🔍 Possible Causes & Fixes

1. Ensure Proper Connection Closing

If you're using Python, ensure that the database connection is closed properly after executing queries.

python
import sqlite3 conn = sqlite3.connect("my_database.db") cursor = conn.cursor() # Execute your queries cursor.execute("SELECT * FROM my_table;") # Commit and close the connection conn.commit() conn.close() # ✅ Always close the connection

👉 Fix: Always call conn.close() after executing queries.


2. Check for Unclosed Transactions

If a transaction (like INSERT, UPDATE, or DELETE) is left open without COMMIT, SQLite keeps the database locked.

sql
COMMIT; -- Ensures any open transaction is completed

👉 Fix: Run COMMIT; in your SQL tool or application.


3. Close Other Processes Using the Database

Run the following SQL command in your SQLite session:

sql
PRAGMA busy_timeout = 5000; -- Waits up to 5 seconds before failing

👉 Fix:

  • If another process is using the DB file, close it (e.g., close DB Browser for SQLite, Python scripts, or any other app accessing the file).
  • Restart your SQLite application.

4. Delete the .db-journal or .db-wal File

If SQLite crashed or was improperly closed, a journal file may still be locking the database.

  1. Navigate to your database folder
  2. Delete files ending in:
    • my_database.db-journal
    • my_database.db-wal

👉 Fix: Remove these files safely if no other process is using the database.


5. Open SQLite in Read-Write Mode

If another process is holding a write lock, try opening the DB in read-only mode:

python
conn = sqlite3.connect("file:my_database.db?mode=ro", uri=True)

👉 Fix: Use mode=ro if you don’t need to write to the database.


6. Kill Hanging Processes (Windows/Linux)

On Windows, find processes locking the file:

sh
tasklist | findstr sqlite

Kill a process using:

sh
taskkill /F /PID <process_id>

On Linux/macOS, find processes:

sh
lsof my_database.db

Kill the process:

sh
kill -9 <process_id>

👉 Fix: Restart any process holding the database.


🚀 Best Practices to Avoid "Database Locked" Issues

Use PRAGMA busy_timeout:

sql
PRAGMA busy_timeout = 5000; -- Waits 5 sec before failing

Use a Single Database Connection
Avoid opening multiple connections in the same script.

Close the Connection Properly
Always call conn.close() after database operations.

Use WAL Mode for Better Concurrency
Enable Write-Ahead Logging (WAL) for better handling of multiple connections:

sql
PRAGMA journal_mode = WAL;

Try these fixes, and let me know if the issue persists! 🚀

... ... ... ... ... ... ... ... ... ... ... ...

print the pid for "ps aux | grep sqlite"

To print only the PID of processes related to SQLite using ps aux | grep sqlite, use this command:

sh
ps aux | grep sqlite | awk '{print $2}'

Explanation:

  • ps aux → Lists all running processes.
  • grep sqlite → Filters processes containing "sqlite".
  • awk '{print $2}' → Extracts and prints only the PID (which is in the second column).

Alternative (More Precise)

If you want to exclude the grep command itself from the results:

sh
ps aux | grep '[s]qlite' | awk '{print $2}'

Example Output:

yaml
1234 5678

These are the PIDs of processes running SQLite.

👉 To kill a process, use:

sh
kill -9 <PID>

Let me know if you need more help! 🚀

... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...

ps -ef what does it do?

The ps -ef command in Linux is used to display a detailed list of all running processes on the system.


🔍 Breakdown of ps -ef

  • ps → Shows active processes.
  • -e → Displays all processes (not just those owned by the current user).
  • -f → Provides full format output (more details).

📌 Example Output

sh
UID PID PPID C STIME TTY TIME CMD root 1 0 0 Feb24 ? 00:01:23 /sbin/init user1 1050 1 0 10:30 ? 00:00:00 /usr/bin/gnome-session user1 2080 1050 0 10:31 ? 00:00:01 /usr/bin/firefox

📌 Column Explanation

ColumnMeaning
UIDUser who started the process
PIDProcess ID
PPIDParent Process ID (which process started this one)
CCPU usage percentage
STIMEProcess start time
TTYTerminal associated with the process (if any)
TIMECPU time used
CMDCommand that started the process

🔍 Useful Variations

✅ Show only processes of a specific user:

sh
ps -ef | grep username

✅ Find a specific process (e.g., SQLite):

sh
ps -ef | grep sqlite

✅ Show hierarchical (parent-child) relationships:

sh
ps -ef --forest

Let me know if you need more details! 🚀😊