chat1
import duckdb
import pandas as pd
def pandas_dtype_to_sql(dtype):
"""Maps pandas data types to SQL data types compatible with DuckDB."""
if pd.api.types.is_integer_dtype(dtype):
return 'INTEGER'
elif pd.api.types.is_float_dtype(dtype):
return 'DOUBLE'
elif pd.api.types.is_bool_dtype(dtype):
return 'BOOLEAN'
elif pd.api.types.is_datetime64_any_dtype(dtype):
return 'TIMESTAMP' # Use TIMESTAMP for both datetime and time-only
elif pd.api.types.is_string_dtype(dtype) or pd.api.types.is_object_dtype(dtype):
return 'VARCHAR'
else:
return 'VARCHAR' # Default type if unknown
def create_and_populate_table(df, db_name, table_name, unique_columns_list):
"""Creates and populates a DuckDB table from a pandas DataFrame.
Args:
df (pandas.DataFrame): DataFrame to store in DuckDB.
db_name (str): Path to the DuckDB database file.
table_name (str): Name of the table to create.
unique_columns_list (list): List of column names to have UNIQUE constraints.
"""
# Connect to DuckDB
con = duckdb.connect(database=db_name)
# Check if the table exists
table_exists = con.execute(f"SELECT count(*) FROM information_schema.tables WHERE table_name = '{table_name}'").fetchone()[0] > 0
if not table_exists:
# Creating column definitions based on DataFrame dtypes
column_defs = ', '.join([
f"{col} {pandas_dtype_to_sql(df[col].dtype)}"
for col in df.columns
])
# Add UNIQUE constraints if any
unique_constraints = ', '.join([f"UNIQUE({col})" for col in unique_columns_list if col in df.columns])
constraints_section = f", {unique_constraints}" if unique_constraints else ""
# Construct the CREATE TABLE SQL command
sql_create_table = f"CREATE TABLE {table_name} ({column_defs}{constraints_section})"
try:
con.execute(sql_create_table)
print(f"Table '{table_name}' created successfully with specified constraints.")
except Exception as e:
print(f"Error creating table: {e}")
return
# Register the DataFrame with DuckDB
con.register(table_name, df)
# Insert data into the table
try:
con.execute(f"INSERT INTO {table_name} SELECT * FROM {table_name}")
print("Data inserted into the table.")
except Exception as e:
print(f"Error inserting data: {e}")
# Fetch and display the table contents to verify
result_df = con.execute(f"SELECT * FROM {table_name}").df()
print(result_df)
# Close the connection
con.close()
# Example usage:
data = {
'id': [1, 2, 3],
'name': ['Alice', 'Bob', 'Charlie'],
'age': [25, 30, 35]
}
df = pd.DataFrame(data)
create_and_populate_table(df, 'my_duckdb.duckdb', 'my_table', ['id'])