chat1
import duckdb
import pandas as pd
def pandas_dtype_to_sql(dtype): """Maps pandas data types to SQL data types compatible with DuckDB.""" if pd.api.types.is_integer_dtype(dtype): return 'INTEGER' elif pd.api.types.is_float_dtype(dtype): return 'DOUBLE' elif pd.api.types.is_bool_dtype(dtype): return 'BOOLEAN' elif pd.api.types.is_datetime64_any_dtype(dtype): return 'TIMESTAMP' # Use TIMESTAMP for both datetime and time-only elif pd.api.types.is_string_dtype(dtype) or pd.api.types.is_object_dtype(dtype): return 'VARCHAR' else: return 'VARCHAR' # Default type if unknown
def create_and_populate_table(store_dataframe_to_duckdb(df, db_name,db_path, table_name, unique_columns_list)table_name): """Creates and populates a DuckDB table from a pandas DataFrame. Args: df (pandas.DataFrame): DataFrame to store in DuckDB. db_name (str): Path to the DuckDB database file. table_name (str): Name of the table to create. unique_columns_list (list): List of column names to form a composite primary key. """
# Connect to DuckDB
con = duckdb.connect(database=db_name)
# Check if the table existsdb_path)
table_exists = con.execute(f"SELECT count(*) FROM information_schema.tables WHERE table_name = '{table_name}'").fetchone()[0] > 0
if not table_exists:
# CreatingStore column definitions based onthe DataFrame dtypesin DuckDB, automatically inferring the schema
column_defscon.execute(f"CREATE =TABLE 'IF NOT EXISTS {table_name} AS SELECT * FROM ?", '.join([ f"{col} {pandas_dtype_to_sql(df[col].dtype)}" for col in df.columns ](df,))
# AddYou compositecan primaryalso key
use this primary_keymethod =to f"PRIMARYreplace KEYor ({',append '.join(unique_columns_list)})" if unique_columns_list else "" data
# Construct the CREATE TABLE SQL command sql_create_table = f"CREATE TABLE {table_name} ({column_defs}, {primary_key})" try: con.execute(sql_create_table) print(f"Table '{table_name}' created successfully with a composite primary key.") except Exception as e: print(f"Error creating table: {e}") return
# Register the DataFrame with DuckDB con.register('df', df)
# Insert data into the table try: con.execute(f"INSERT INTO {table_name} SELECT * FROM df"?", (df,) print("Data inserted into the table.") except Exception as e: print(f"Error inserting data: {e}")
# FetchCommit changes and display the table contents to verify result_df = con.execute(f"SELECT * FROM {table_name}").df() print(result_df)
# Closeclose the connection
con.commit()
con.close()
# Example usage:data = { 'team_id': [1, 1, 2], 'person_id': [101, 102, 101], 'role': ['Coach', 'Player', 'Coach']}usage
df = pd.DataFrame(data){create_and_populate_table( 'id': [1, 2, 3],
'name': ['Alice', 'Bob', 'Charlie'],
'age': [25, 30, 35]
})
store_dataframe_to_duckdb(df, 'my_duckdb.my_database.duckdb', 'team_members', ['team_id', 'person_id']people')