chat1
import
# Assuming df is your original DataFrame with 'timestamp' already truncated to second precision and named 'timestamp_bucket'
# Step 1: Count entries per second
count_per_second = wide_unwide.sort(by=df.groupby('timestamp'timestamp_bucket').with_columns(
# Step next_wide2: Find the buckets where count is 20 or more
offline_starts = count_per_second.filter(pl.col('count') >= 20).select('timestamp_bucket')
# Step 3: Generate a range of 30 subsequent seconds for each start point
# We'll use an "is_wide").shift(-1),
offline_periods = offline_starts.with_column(
(pl.arange(
pl.col(
).
).explode('offline_range')
# Step (pl.col("is_wide")4: ==Remove 0)duplicates &in (pl.col("next_wide")case ==of 1)
# Step next_wide5: Join this with the original dataframe to classify each timestamp
df = df.join(
offline_periods.with_column(pl.col(lit("is_wide"OFFLINE").shift(-1)alias("status")),
right_on="offline_range",
how="left"
)
# Step 6: Fill non-offline periods with "ONLINE"
df = df.with_column(
pl.when(pl.col("is_unwide"'status').shift(-1),
.then("ONLINE")
.alias('status')
)
print(df)