chat1

import

polars as ~~next_is_wide~~pl

# Assuming df is your original DataFrame with 'timestamp' already truncated to second precision and named 'timestamp_bucket'

# Step 1: Count entries per second
count_per_second = ~~wide_unwide.sort(by=~~df.groupby('~~timestamp'~~timestamp_bucket').~~with_columns(~~

agg(pl.count().alias('count'))

# Step ~~next_wide~~2: Find the buckets where count is 20 or more
offline_starts = count_per_second.filter(pl.col('count') >= 20).select('timestamp_bucket')

# Step 3: Generate a range of 30 subsequent seconds for each start point
# We'll use an "~~is_wide").shift(-1),~~

exploding"

technique with ~~next_unwide~~arange to create the additional 30 seconds
offline_periods = offline_starts.with_column(
(pl.arange(
pl.col(~~"is_unwide"~~'timestamp_bucket')~~.shift(-1)~~,

pl.col('timestamp_bucket') + pl.duration_seconds(30)
).~~filter(~~

alias('offline_range'))
).explode('offline_range')

# Step ~~(pl.col("is_wide")~~4: ==Remove 0)duplicates &in ~~(pl.col("next_wide")~~case ==of 1)

)

overlap

~~next_is_unwide~~offline_periods = ~~wide_unwide.sort(by=~~offline_periods.select('~~timestamp'~~offline_range').~~with_columns(~~

distinct().sort('offline_range')

# Step ~~next_wide~~5: Join this with the original dataframe to classify each timestamp
df = df.join(
offline_periods.with_column(pl.~~col(~~lit("~~is_wide"~~OFFLINE").~~shift(-1)~~alias("status")),

~~next_unwide~~left_on="timestamp_bucket",
right_on="offline_range",
how="left"
)

# Step 6: Fill non-offline periods with "ONLINE"
df = df.with_column(
pl.when(pl.col(~~"is_unwide"~~'status').~~shift(-1),~~

is_null())
.then("ONLINE")~~.filter(~~

(.otherwise(pl.col(~~"is_wide"~~'status') ~~== 1) & (pl.col("next_unwide"~~) ~~== 1)~~

.alias('status')

)

print(df)