chat1
import duckdb
import pandas as pd
def pandas_dtype_to_sql(dtype):
if pd.api.types.is_integer_dtype(dtype):
return 'INT'
elif pd.api.types.is_float_dtype(dtype):
return 'DOUBLE'
elif pd.api.types.is_bool_dtype(dtype):
return 'BOOLEAN'
elif pd.api.types.is_datetime64_any_dtype(dtype):
return 'TIMESTAMP'
elif pd.api.types.is_string_dtype(dtype):
return 'VARCHAR'
else:
return 'VARCHAR' # Default type if unknown
def create_and_populate_table(df, db_name, table_name, unique_columns_list):
# Connect to DuckDB
con = duckdb.connect(db_name)
# Check if the table exists
table_exists = con.execute(f"SELECT count(*) FROM information_schema.tables WHERE table_name = '{table_name}'").fetchone()[0] > 0
if not table_exists:
# Creating column definitions based on DataFrame dtypes
column_defs = ', '.join([f"{col} {pandas_dtype_to_sql(df[col].dtype)}" for col in df.columns])
# Creating UNIQUE constraints string
unique_str = f"UNIQUE({', '.join(unique_columns_list)})" if unique_columns_list else ""
# Combine column definitions and constraints
sql_create_table = f"CREATE TABLE {table_name} ({column_defs} {unique_str})".rstrip(", ")
try:
con.execute(sql_create_table)
print(f"Table '{table_name}' created successfully.")
except Exception as e:
print(f"Error creating table: {e}")
return
# Register the DataFrame with DuckDB
con.register(table_name, df)
# Insert data into the table
try:
con.execute(f"INSERT INTO {table_name} SELECT * FROM {table_name}")
print("Data inserted into the table.")
except Exception as e:
print(f"Error inserting data: {e}")
# Fetch and display the table contents to verify
result_df = con.execute(f"SELECT * FROM {table_name}").df()
print(result_df)
# Close the connection
con.close()
# Example usage:
data = {
'id': [1, 2, 3],
'name': ['Alice', 'Bob', 'Charlie'],
'age': [25, 30, 35]
}
df = pd.DataFrame(data)
create_and_populate_table(df, 'my_duckdb.duckdb', 'my_table', ['