chat1
import polars as pl
# Example setup: Adjust according to your DataFrame
df = pl.DataFrame({
"timestamp": [
"2024-04-19 07:08:13.959",
"2024-04-19 07:08:13.970",
"2024-04-19 07:08:17.823",
# Add more timestamps as needed
]
}).with_columns([
pl.col("timestamp").str.strptime(pl.Datetime, "%Y-%m-%d %H:%M:%S.%f").dt.truncate("1s").alias("timestamp_bucket")
])
# Step 1: Count entries per second
count_per_second = df.groupby("timestamp_bucket").agg(pl.count().alias("count"))
# Step 2: Identify buckets with 20 or more entries
high_traffic_buckets = count_per_second.filter(pl.col("count") >= 20)
# Step 3: Create the list of all buckets to be marked as OFFLINE for the next 30 seconds
def generate_offline_buckets(row):
base_time = row['timestamp_bucket']
return [base_time + pl.duration(seconds=i) for i in range(31)]
# Create a list of timestamps for each high traffic bucket
offline_buckets = high_traffic_buckets.select([
pl.explode(pl.map([pl.lit(x) for x in range(31)], lambda x: pl.col("timestamp_bucket") + pl.duration(seconds=x))).alias("offline_bucket")
]).distinct()
# Step 4: Join this with the original dataframe to classify each timestamp
df = df.join(
offline_buckets.with_columns([pl.lit("OFFLINE").alias("status")]),
left_on="timestamp_bucket",
right_on="offline_bucket",
how="left"
)
# Step 5: Fill non-offline periods with "ONLINE"
df = df.with_columns(
pl.when(pl.col("status").is_null())
.then("ONLINE")
.otherwise(pl.col("status"))
.alias("status")
)
print(df)