chatterboc

chat1

import polars as pl

chunk_size = 1000  # Adjust based on available memory
dataframes = []

for i in range(0, len(a_list), chunk_size):
    chunk = a_list[i:i + chunk_size]
    dataframes.append(pl.DataFrame(chunk, schema=["stsdf", "abc"]))

combined_df = pl.concat(dataframes)

chat1

import polars as pl

def flag_offline_periods(df):
    # Count entries per bucket
    counts = df.groupby('timestamp_bucket').agg(pl.count('*').alias('count'))
    
    # Identify buckets with more than 20 entries
    high_count_buckets = counts.filter(pl.col('count') > 20)['timestamp_bucket']
    
    # Create a function to generate offline periods
    def generate_offline_periods(high_count_buckets):
        offline_periods = []
        for bucket in high_count_buckets:
            offline_end = bucket + pl.duration(seconds=30)
            offline_periods.append((bucket, offline_end))
        return offline_periods

    # Generate offline periods
    offline_periods = generate_offline_periods(high_count_buckets)
    
    # Merge overlapping periods
    merged_periods = []
    for start, end in sorted(offline_periods):
        if merged_periods and start <= merged_periods[-1][1]:
            merged_periods[-1] = (merged_periods[-1][0], max(merged_periods[-1][1], end))
        else:
            merged_periods.append((start, end))
    
    # Create a function to check if a timestamp is within offline periods
    def is_offline(timestamp):
        return any(start <= timestamp < end for start, end in merged_periods)
    
    # Apply the offline check to each row
    df = df.with_columns(
        pl.when(pl.col('timestamp_bucket').apply(is_offline))
        .then(pl.lit('OFFLINE'))
        .otherwise(pl.lit('ONLINE'))
        .alias('status')
    )
    
    return df

# Assuming your DataFrame is called 'df'
df = flag_offline_periods(df)

bro

#!/usr/bin/env bash

# Usage: ./group_by_date_hour_symbol_source.sh path_to_logfile

LOGFILE="$1"

awk '
  {
    # Split the first field (20250209-13:45:02.123456) on the dash.
    # dt[1] will hold "20250209", dt[2] will hold "13:45:02.123456".
    split($1, dt, "-")
    date_str = dt[1]  # e.g. "20250209"

    # Now split dt[2] ("13:45:02.123456") on the colon to get the hour.
    split(dt[2], time_parts, ":")
    hour = time_parts[1]  # e.g. "13"

    symbol = $2   # e.g. "USDCAD"
    source = $3   # e.g. "MyLiquidityProvider"

    # Increment a count in an associative array, keyed by date, hour, symbol, source.
    counts[date_str, hour, symbol, source]++
  }
  END {
    # Print header (optional)
    # print "DATE", "HOUR", "SYMBOL", "SOURCE", "COUNT"

    # Loop over all keys in counts.
    for (key in counts) {
      # The key format is "date_str SUBSEP hour SUBSEP symbol SUBSEP source".
      split(key, parts, SUBSEP)
      date_str = parts[1]
      hour     = parts[2]
      symbol   = parts[3]
      source   = parts[4]
      count    = counts[key]

      print date_str, hour, symbol, source, count
    }
  }
' "$LOGFILE" | sort

asdasd

@echo off
start wt new-tab --profile "Command Prompt" --title "Program" cmd.exe /k "program.exe" ^
; split-pane --horizontal cmd.exe /k "stuff.exe" ^
; split-pane --vertical cmd.exe /k "morestuff.exe" ^
; new-tab --profile "Git Bash" --title "UAT Servers" bash.exe -c "uat1 && connectthis && ls" ^
; split-pane --horizontal bash.exe -c "uat2 && connectthis && ls" ^
; split-pane --vertical bash.exe -c "uat3 && connectthis && ls"

 

 

 

@echo off
start wt new-tab --profile "Command Prompt" --title "Program" --tabColor "#0C0C52" cmd.exe /k "program.exe" ^
; split-pane --horizontal cmd.exe /k "stuff.exe" ^
; split-pane --vertical cmd.exe /k "morestuff.exe" ^
; new-tab --profile "Git Bash" --title "UAT Servers" --tabColor "#8B4513" bash.exe -c "uat1 && connectthis && ls" ^
; split-pane --horizontal bash.exe -c "uat2 && connectthis && ls" ^
; split-pane --vertical bash.exe -c "uat3 && connectthis && ls"

sadfzxcv

#!/bin/bash
#
# This script loads all tables in /stuff/kdb, runs a hardcoded multi-line Q-syntax query,
# and writes the results into /home/stuff/output with a timestamped filename.

# Move to the kdb directory
cd /stuff/kdb || {
  echo "Error: Failed to cd into /stuff/kdb. Check that it exists."
  exit 1
}

# Hardcoded multi-line Q/SQL-like query
# (Adjust the actual code to your use case)
read -r -d '' MULTI_LINE_SQL <<'EQUERY'
select from trades
 where sym in `AAPL`MSFT,
       time within 09:30 16:00
EQUERY

# Generate a timestamp
TIMESTAMP=$(date +"%Y%m%d_%H%M%S")

# Hardcoded output directory (ensure this directory exists or create it beforehand)
OUTPUT_DIR="/home/stuff/output"

# Construct the output filename (e.g., output_20250310_143042.csv)
OUTPUT_CSV="${OUTPUT_DIR}/output_${TIMESTAMP}.csv"

# Run q with 1GB workspace
q -w 1000 <<EOF
  \l .
  results: $MULTI_LINE_SQL
  \`$OUTPUT_CSV 0: results
  \\
EOF

echo "Query results saved to: $OUTPUT_CSV"