Skip to main content


import polars as pl

#def Example setup: Adjust according to your DataFrame
df = pl.DataFrame({
    "timestamp"flag_offline_periods(df): [
        "2024-04-19 07:08:13.959",
        "2024-04-19 07:08:13.970",
        "2024-04-19 07:08:17.823",
    # Add more timestamps as needed
    pl.col("timestamp").str.strptime(pl.Datetime, "%Y-%m-%d %H:%M:%S.%f").dt.truncate("1s").alias("timestamp_bucket")

# Step 1: Count entries per secondbucket
count_per_second    counts = df.groupby("timestamp_bucket"'timestamp_bucket').agg(pl.count('*').alias("count"'count'))


    # Step 2: Identify buckets with more than 20 or more entries
high_traffic_buckets    high_count_buckets = count_per_second.counts.filter(pl.col("count"'count') >= 20)

# Step 3: Create the list of all buckets to be marked as OFFLINE for the next 30 seconds
def generate_offline_buckets(row):
    base_time = row[['timestamp_bucket']
    # Create a function to generate offline periods
    def generate_offline_periods(high_count_buckets):
        offline_periods = [base_time]
        for bucket in high_count_buckets:
            offline_end = bucket + pl.duration(seconds=i)30)
            offline_periods.append((bucket, offline_end))
        return offline_periods

    # Generate offline periods
    offline_periods = generate_offline_periods(high_count_buckets)
    # Merge overlapping periods
    merged_periods = []
    for istart, end in range(31)]


      if merged_periods and start <= merged_periods[-1][1]:
            merged_periods[-1] = (merged_periods[-1][0], max(merged_periods[-1][1], end))
            merged_periods.append((start, end))
    # Create a listfunction ofto timestampscheck if a timestamp is within offline periods
    def is_offline(timestamp):
        return any(start <= timestamp < end for eachstart, highend trafficin bucket
offline_buckets =[merged_periods)
 for   x in range(31)], lambda x: pl.col("timestamp_bucket") + pl.duration(seconds=x))).alias("offline_bucket")

# Step 4: Join this withApply the originaloffline dataframecheck to classify each timestamp
df = df.join(row

# Step 5: Fill non-offline periods with "ONLINE"
df = df.with_columns(
    return df

print(# Assuming your DataFrame is called 'df'
df = flag_offline_periods(df)