Skip to main content

chat1

import polars as pl

# Example setup: Adjust according to your DataFrame
df = pl.DataFrame({
    "timestamp": [
        "2024-04-19 07:08:13.959",
        "2024-04-19 07:08:13.970",
        "2024-04-19 07:08:17.823",
        # Add more timestamps as needed
    ]
}).with_columns([
    pl.col("timestamp").str.strptime(pl.Datetime, "%Y-%m-%d %H:%M:%S.%f").dt.truncate("1s").alias("timestamp_bucket")
])

# Step 1: Count entries per second
count_per_second = df.groupby("timestamp_bucket").agg(pl.count().alias("count"))

# Step 2: Identify buckets with 20 or more entries
high_traffic_buckets = count_per_second.filter(pl.col("count") >= 20)

# Step 3: Create the list of all buckets to be marked as OFFLINE for the next 30 seconds
def generate_offline_buckets(row):
    base_time = row['timestamp_bucket']
    return [base_time + pl.duration(seconds=i) for i in range(31)]

# Create a list of timestamps for each high traffic bucket
offline_buckets = high_traffic_buckets.select([
    pl.explode(pl.map([pl.lit(x) for x in range(31)], lambda x: pl.col("timestamp_bucket") + pl.duration(seconds=x))).alias("offline_bucket")
]).distinct()

# Step 4: Join this with the original dataframe to classify each timestamp
df = df.join(
    offline_buckets.with_columns([pl.lit("OFFLINE").alias("status")]),
    left_on="timestamp_bucket",
    right_on="offline_bucket",
    how="left"
)

# Step 5: Fill non-offline periods with "ONLINE"
df = df.with_columns(
    pl.when(pl.col("status").is_null())
    .then("ONLINE")
    .otherwise(pl.col("status"))
    .alias("status")
)

print(df)