chatterboc

chat1

import pandas as pd
import itertools

def generate_combinations(input_file, output_file):
    # Load the CSV file into a pandas DataFrame
    df = pd.read_csv(input_file)
    
    # Drop any rows where all columns are empty
    df.dropna(how='all', inplace=True)
    
    # Gather unique non-empty entries in each column
    columns_data = [df[col].dropna().unique() for col in df.columns]
    
    # Generate all possible combinations across columns
    combinations = list(itertools.product(*columns_data))
    
    # Create a new DataFrame from the combinations and write to CSV
    output_df = pd.DataFrame(combinations, columns=df.columns)
    output_df.to_csv(output_file, index=False)
    print(f"Combinations have been saved to {output_file}")

# Usage example:
generate_combinations('input.csv', 'output.csv')

chat1

import polars as pl

def flag_offline_periods(df):
    # Count entries per bucket
    counts = df.groupby('timestamp_bucket').agg(pl.count('*').alias('count'))
    
    # Identify buckets with more than 20 entries
    high_count_buckets = counts.filter(pl.col('count') > 20)['timestamp_bucket']
    
    # Create a function to generate offline periods
    def generate_offline_periods(high_count_buckets):
        offline_periods = []
        for bucket in high_count_buckets:
            offline_end = bucket + pl.duration(seconds=30)
            offline_periods.append((bucket, offline_end))
        return offline_periods

    # Generate offline periods
    offline_periods = generate_offline_periods(high_count_buckets)
    
    # Merge overlapping periods
    merged_periods = []
    for start, end in sorted(offline_periods):
        if merged_periods and start <= merged_periods[-1][1]:
            merged_periods[-1] = (merged_periods[-1][0], max(merged_periods[-1][1], end))
        else:
            merged_periods.append((start, end))
    
    # Create a function to check if a timestamp is within offline periods
    def is_offline(timestamp):
        return any(start <= timestamp < end for start, end in merged_periods)
    
    # Apply the offline check to each row
    df = df.with_columns(
        pl.when(pl.col('timestamp_bucket').apply(is_offline))
        .then(pl.lit('OFFLINE'))
        .otherwise(pl.lit('ONLINE'))
        .alias('status')
    )
    
    return df

# Assuming your DataFrame is called 'df'
df = flag_offline_periods(df)