Skip to main content

chat1

import duckdb
import pandas as pd

def pandas_dtype_to_sql(dtype):
    """Maps pandas data types to SQL data types compatible with DuckDB."""
    if pd.api.types.is_integer_dtype(dtype):
        return 'INTEGER'
    elif pd.api.types.is_float_dtype(dtype):
        return 'DOUBLE'
    elif pd.api.types.is_bool_dtype(dtype):
        return 'BOOLEAN'
    elif pd.api.types.is_datetime64_any_dtype(dtype):
        return 'TIMESTAMP'  # Use TIMESTAMP for both datetime and time-only
    elif pd.api.types.is_string_dtype(dtype) or pd.api.types.is_object_dtype(dtype):
        return 'VARCHAR'
    else:
        return 'VARCHAR'  # Default type if unknown

def create_and_populate_table(df, db_name, table_name, unique_columns_list):
    """Creates and populates a DuckDB table from a pandas DataFrame.
    Args:
        df (pandas.DataFrame): DataFrame to store in DuckDB.
        db_name (str): Path to the DuckDB database file.
        table_name (str): Name of the table to create.
        unique_columns_list (list): List of column names to have UNIQUE constraints.
    """
    # Connect to DuckDB
    con = duckdb.connect(database=db_name)

    # Check if the table exists
    table_exists = con.execute(f"SELECT count(*) FROM information_schema.tables WHERE table_name = '{table_name}'").fetchone()[0] > 0

    if not table_exists:
        # Creating column definitions based on DataFrame dtypes
        column_defs = ', '.join([
            f"{col} {pandas_dtype_to_sql(df[col].dtype)}"
            for col in df.columns
        ])
        
        # Add UNIQUE constraints if any
        unique_constraints = ', '.join([f"UNIQUE({col})" for col in unique_columns_list if col in df.columns])
        constraints_section = f", {unique_constraints}" if unique_constraints else ""
        
        # Construct the CREATE TABLE SQL command
        sql_create_table = f"CREATE TABLE {table_name} ({column_defs}{constraints_section})"
        try:
            con.execute(sql_create_table)
            print(f"Table '{table_name}' created successfully with specified constraints.")
        except Exception as e:
            print(f"Error creating table: {e}")
            return

    # Register the DataFrame with DuckDB
    con.register(table_name, df)

    # Insert data into the table
    try:
        con.execute(f"INSERT INTO {table_name} SELECT * FROM {table_name}")
        print("Data inserted into the table.")
    except Exception as e:
        print(f"Error inserting data: {e}")

    # Fetch and display the table contents to verify
    result_df = con.execute(f"SELECT * FROM {table_name}").df()
    print(result_df)

    # Close the connection
    con.close()

# Example usage:
data = {
    'id': [1, 2, 3],
    'name': ['Alice', 'Bob', 'Charlie'],
    'age': [25, 30, 35]
}
df = pd.DataFrame(data)
create_and_populate_table(df, 'my_duckdb.duckdb', 'my_table', ['id'])