chat1
import duckdb
import pandas as pd
def pandas_dtype_to_sql(dtype):
"""Maps pandas data types to SQL data types compatible with DuckDB."""
if pd.api.types.is_integer_dtype(dtype):
return 'INTEGER'
elif pd.api.types.is_float_dtype(dtype):
return 'DOUBLE'
elif pd.api.types.is_bool_dtype(dtype):
return 'BOOLEAN'
elif pd.api.types.is_datetime64_any_dtype(dtype):
return 'TIMESTAMP' # Use TIMESTAMP for both datetime and time-only
elif pd.api.types.is_string_dtype(dtype) or pd.api.types.is_object_dtype(dtype):
return 'VARCHAR'
else:
return 'VARCHAR' # Default type if unknown
def create_and_populate_table(df, db_name, table_name, unique_columns_list):
"""Creates and populates a DuckDB table from a pandas DataFrame.
Args:
df (pandas.DataFrame): DataFrame to store in DuckDB.
db_name (str): Path to the DuckDB database file.
table_name (str): Name of the table to create.
unique_columns_list (list): List of column names to haveform UNIQUEa constraints.composite primary key.
"""
# Connect to DuckDB
con = duckdb.connect(database=db_name)
# Check if the table exists
table_exists = con.execute(f"SELECT count(*) FROM information_schema.tables WHERE table_name = '{table_name}'").fetchone()[0] > 0
if not table_exists:
# Creating column definitions based on DataFrame dtypes
column_defs = ', '.join([
f"{col} {pandas_dtype_to_sql(df[col].dtype)}"
for col in df.columns
])
# Add UNIQUEcomposite constraintsprimary if anykey
unique_constraintsprimary_key = f"PRIMARY KEY ({', '.join([f"UNIQUE({col}unique_columns_list)})" for col in unique_columns_list if col in df.columns]) constraints_section = f", {unique_constraints}" if unique_constraintsunique_columns_list else ""
# Construct the CREATE TABLE SQL command
sql_create_table = f"CREATE TABLE {table_name} ({column_defs}, {constraints_section}primary_key})"
try:
con.execute(sql_create_table)
print(f"Table '{table_name}' created successfully with specifieda constraints.composite primary key.")
except Exception as e:
print(f"Error creating table: {e}")
return
# Register the DataFrame with DuckDB
con.register(table_name,'df', df)
# Insert data into the table
try:
con.execute(f"INSERT INTO {table_name} SELECT * FROM {table_name}"df")
print("Data inserted into the table.")
except Exception as e:
print(f"Error inserting data: {e}")
# Fetch and display the table contents to verify
result_df = con.execute(f"SELECT * FROM {table_name}").df()
print(result_df)
# Close the connection
con.close()
# Example usage:
data = {
'id'team_id': [1, 2,1, 3]2],
'name'person_id': ['Alice',101, 'Bob',102, 'Charlie']101],
'age'role': [25,'Coach', 30,'Player', 35]'Coach']
}
df = pd.DataFrame(data)
create_and_populate_table(df, 'my_duckdb.duckdb', 'my_table'team_members', ['id'team_id', 'person_id'])