Skip to main content

chat1

import

polars as next_is_widepl

# Assuming df is your original DataFrame with 'timestamp' already truncated to second precision and named 'timestamp_bucket'

# Step 1: Count entries per second
count_per_second = wide_unwide.sort(by=df.groupby('timestamp'timestamp_bucket').with_columns(

agg(pl.count().alias('count'))

# Step next_wide2: Find the buckets where count is 20 or more
offline_starts = count_per_second.filter(pl.col('count') >= 20).select('timestamp_bucket')

# Step 3: Generate a range of 30 subsequent seconds for each start point
# We'll use an "is_wide").shift(-1),

exploding"
technique with next_unwidearange to create the additional 30 seconds
offline_periods = offline_starts.with_column(
    (pl.arange(
        pl.col("is_unwide"'timestamp_bucket').shift(-1),

 
      pl.col('timestamp_bucket') + pl.duration_seconds(30)
    ).filter(
alias('offline_range'))
).explode('offline_range')

# Step (pl.col("is_wide")4: ==Remove 0)duplicates &in (pl.col("next_wide")case ==of 1)

    )
overlap
    next_is_unwideoffline_periods = wide_unwide.sort(by=offline_periods.select('timestamp'offline_range').with_columns(
distinct().sort('offline_range')

# Step next_wide5: Join this with the original dataframe to classify each timestamp
df = df.join(
    offline_periods.with_column(pl.col(lit("is_wide"OFFLINE").shift(-1)alias("status")),


    next_unwideleft_on="timestamp_bucket",
    right_on="offline_range",
    how="left"
)

# Step 6: Fill non-offline periods with "ONLINE"
df = df.with_column(
    pl.when(pl.col("is_unwide"'status').shift(-1),

is_null())
    .then("ONLINE").filter(

    (.otherwise(pl.col("is_wide"'status') == 1) & (pl.col("next_unwide") == 1)

    .alias('status')

)

print(df)