Skip to main content

chat1

import duckdb
import pandas as pd

def pandas_dtype_to_sql(dtype):
    if pd.api.types.is_integer_dtype(dtype):
        return 'INT'
    elif pd.api.types.is_float_dtype(dtype):
        return 'DOUBLE'
    elif pd.api.types.is_bool_dtype(dtype):
        return 'BOOLEAN'
    elif pd.api.types.is_datetime64_any_dtype(dtype):
        return 'TIMESTAMP'
    elif pd.api.types.is_string_dtype(dtype):
        return 'VARCHAR'
    else:
        return 'VARCHAR'  # Default type if unknown

def create_and_populate_table(df, db_name, table_name, unique_columns_list):
    # Connect to DuckDB
    con = duckdb.connect(db_name)

    # Check if the table exists
    table_exists = con.execute(f"SELECT count(*) FROM information_schema.tables WHERE table_name = '{table_name}'").fetchone()[0] > 0

    if not table_exists:
        # Creating column definitions based on DataFrame dtypes
        column_defs = ', '.join([f"{col} {pandas_dtype_to_sql(df[col].dtype)}" for col in df.columns])
        
        # Creating UNIQUE constraints string
        unique_str = f"UNIQUE({', '.join(unique_columns_list)})" if unique_columns_list else ""
        
        # Combine column definitions and constraints
        sql_create_table = f"CREATE TABLE {table_name} ({column_defs} {unique_str})".rstrip(", ")
        try:
            con.execute(sql_create_table)
            print(f"Table '{table_name}' created successfully.")
        except Exception as e:
            print(f"Error creating table: {e}")
            return

    # Register the DataFrame with DuckDB
    con.register(table_name, df)

    # Insert data into the table
    try:
        con.execute(f"INSERT INTO {table_name} SELECT * FROM {table_name}")
        print("Data inserted into the table.")
    except Exception as e:
        print(f"Error inserting data: {e}")

    # Fetch and display the table contents to verify
    result_df = con.execute(f"SELECT * FROM {table_name}").df()
    print(result_df)

    # Close the connection
    con.close()

# Example usage:
data = {
    'id': [1, 2, 3],
    'name': ['Alice', 'Bob', 'Charlie'],
    'age': [25, 30, 35]
}
df = pd.DataFrame(data)
create_and_populate_table(df, 'my_duckdb.duckdb', 'my_table', ['