part1 fixes
This commit is contained in:
@@ -2,3 +2,5 @@
|
|||||||
__pycache__/
|
__pycache__/
|
||||||
*.pyc
|
*.pyc
|
||||||
*.pyo
|
*.pyo
|
||||||
|
cache/
|
||||||
|
*.parquet
|
||||||
|
|||||||
@@ -7,7 +7,6 @@ requires-python = ">=3.14"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"aiohttp>=3.14.1",
|
"aiohttp>=3.14.1",
|
||||||
"altair>=6.2.1",
|
"altair>=6.2.1",
|
||||||
"binance>=0.3.110",
|
|
||||||
"cryptography>=49.0.0",
|
"cryptography>=49.0.0",
|
||||||
"jupyter>=1.1.1",
|
"jupyter>=1.1.1",
|
||||||
"matplotlib>=3.10.9",
|
"matplotlib>=3.10.9",
|
||||||
|
|||||||
@@ -0,0 +1,148 @@
|
|||||||
|
from typing import List
|
||||||
|
import requests
|
||||||
|
import zipfile
|
||||||
|
from pathlib import Path
|
||||||
|
import polars as pl
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
import research
|
||||||
|
|
||||||
|
MAKER_FEE = 0.000450
|
||||||
|
TAKER_FEE = 0.000450
|
||||||
|
|
||||||
|
def download_and_unzip(symbol: str, date: str | datetime,
|
||||||
|
download_dir: str = "data", cache_dir: str = "cache") -> pl.DataFrame:
|
||||||
|
"""
|
||||||
|
Download and unzip Binance futures trade data for a given symbol and date.
|
||||||
|
Caches results as parquet files to avoid repeated downloads.
|
||||||
|
"""
|
||||||
|
# Normalize date to string
|
||||||
|
date_str = date.strftime('%Y-%m-%d') if isinstance(date, datetime) else date
|
||||||
|
|
||||||
|
cache_dir = Path(cache_dir)
|
||||||
|
cache_dir.mkdir(exist_ok=True)
|
||||||
|
cache_path = cache_dir / f"{symbol}-trades-{date_str}.parquet"
|
||||||
|
|
||||||
|
if cache_path.exists():
|
||||||
|
return pl.read_parquet(cache_path)
|
||||||
|
|
||||||
|
url = f"https://data.binance.vision/data/futures/um/daily/trades/{symbol}/{symbol}-trades-{date_str}.zip"
|
||||||
|
|
||||||
|
download_dir = Path(download_dir)
|
||||||
|
download_dir.mkdir(exist_ok=True)
|
||||||
|
zip_path = download_dir / f"{symbol}-trades-{date_str}.zip"
|
||||||
|
|
||||||
|
# Download zip
|
||||||
|
response = requests.get(url, stream=True)
|
||||||
|
response.raise_for_status()
|
||||||
|
with open(zip_path, 'wb') as f:
|
||||||
|
for chunk in response.iter_content(chunk_size=8192):
|
||||||
|
f.write(chunk)
|
||||||
|
|
||||||
|
# Extract
|
||||||
|
with zipfile.ZipFile(zip_path, 'r') as zf:
|
||||||
|
zf.extractall(download_dir)
|
||||||
|
|
||||||
|
csv_path = download_dir / f"{symbol}-trades-{date_str}.csv"
|
||||||
|
|
||||||
|
# Load into Polars
|
||||||
|
df = pl.read_csv(
|
||||||
|
csv_path,
|
||||||
|
schema={
|
||||||
|
"id": pl.Int64,
|
||||||
|
"price": pl.Float64,
|
||||||
|
"qty": pl.Float64,
|
||||||
|
"quoteQty": pl.Float64,
|
||||||
|
"time": pl.Int64,
|
||||||
|
"isBuyerMaker": pl.Boolean,
|
||||||
|
}
|
||||||
|
).with_columns(
|
||||||
|
pl.from_epoch("time", time_unit="ms").alias("datetime")
|
||||||
|
)
|
||||||
|
|
||||||
|
# Cache and clean
|
||||||
|
df.write_parquet(cache_path)
|
||||||
|
zip_path.unlink(missing_ok=True)
|
||||||
|
csv_path.unlink(missing_ok=True)
|
||||||
|
|
||||||
|
return df
|
||||||
|
|
||||||
|
|
||||||
|
def download_date_range(symbol: str, start_date: str | datetime, end_date: str | datetime,
|
||||||
|
download_dir: str = "data", cache_dir: str = "cache") -> list[pl.DataFrame]:
|
||||||
|
"""
|
||||||
|
Download trade data for a range of dates with a progress bar.
|
||||||
|
"""
|
||||||
|
if isinstance(start_date, str):
|
||||||
|
start_date = datetime.strptime(start_date, '%Y-%m-%d')
|
||||||
|
if isinstance(end_date, str):
|
||||||
|
end_date = datetime.strptime(end_date, '%Y-%m-%d')
|
||||||
|
|
||||||
|
num_days = (end_date - start_date).days + 1
|
||||||
|
|
||||||
|
for i in tqdm(range(num_days), desc=f"Downloading {symbol}"):
|
||||||
|
current_date = start_date + timedelta(days=i)
|
||||||
|
try:
|
||||||
|
download_and_unzip(symbol, current_date, download_dir, cache_dir)
|
||||||
|
except Exception as e:
|
||||||
|
tqdm.write(f"[ERROR] {symbol} {current_date.date()}: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def download_trades(symbol: str, no_days: int,
|
||||||
|
download_dir: str = "data", cache_dir: str = "cache", return_trades=False) -> pl.DataFrame:
|
||||||
|
"""
|
||||||
|
Download trades for the last N days up to yesterday with a progress bar.
|
||||||
|
"""
|
||||||
|
yesterday = datetime.now() - timedelta(days=1)
|
||||||
|
start_date = yesterday - timedelta(days=no_days - 1)
|
||||||
|
|
||||||
|
dfs = []
|
||||||
|
for i in tqdm(range(no_days), desc=f"Downloading {symbol}"):
|
||||||
|
current_date = start_date + timedelta(days=i)
|
||||||
|
try:
|
||||||
|
if return_trades:
|
||||||
|
dfs.append(download_and_unzip(symbol, current_date, download_dir, cache_dir))
|
||||||
|
else:
|
||||||
|
download_and_unzip(symbol, current_date, download_dir, cache_dir)
|
||||||
|
except Exception as e:
|
||||||
|
tqdm.write(f"[ERROR] {symbol} {current_date.date()}: {e}")
|
||||||
|
|
||||||
|
return pl.concat(dfs) if return_trades else None
|
||||||
|
|
||||||
|
|
||||||
|
def download_ohlc_timeseries(symbol: str, no_days: int, time_interval: str, download_dir: str = "data", cache_dir: str = "cache") -> pl.DataFrame:
|
||||||
|
"""
|
||||||
|
Download trades for the last N days up to yesterday with a progress bar.
|
||||||
|
"""
|
||||||
|
yesterday = datetime.now() - timedelta(days=1)
|
||||||
|
start_date = yesterday - timedelta(days=no_days - 1)
|
||||||
|
|
||||||
|
time_series = []
|
||||||
|
for i in tqdm(range(no_days), desc=f"Downloading {symbol}"):
|
||||||
|
current_date = start_date + timedelta(days=i)
|
||||||
|
try:
|
||||||
|
trades = download_and_unzip(symbol, current_date, download_dir, cache_dir)
|
||||||
|
time_series.append(research.timeseries(trades, time_interval, research.OHLC_AGGS))
|
||||||
|
except Exception as e:
|
||||||
|
tqdm.write(f"[ERROR] {symbol} {current_date.date()}: {e}")
|
||||||
|
return pl.concat(time_series)
|
||||||
|
|
||||||
|
|
||||||
|
def download_timeseries(symbol: str, no_days: int, time_interval: str, aggs: List[pl.Expr], download_dir: str = "data", cache_dir: str = "cache") -> pl.DataFrame:
|
||||||
|
"""
|
||||||
|
Download trades for the last N days up to yesterday with a progress bar.
|
||||||
|
"""
|
||||||
|
yesterday = datetime.now() - timedelta(days=1)
|
||||||
|
start_date = yesterday - timedelta(days=no_days - 1)
|
||||||
|
|
||||||
|
time_series = []
|
||||||
|
for i in tqdm(range(no_days), desc=f"Downloading {symbol}"):
|
||||||
|
current_date = start_date + timedelta(days=i)
|
||||||
|
try:
|
||||||
|
trades = download_and_unzip(symbol, current_date, download_dir, cache_dir)
|
||||||
|
time_series.append(research.timeseries(trades, time_interval, aggs))
|
||||||
|
except Exception as e:
|
||||||
|
tqdm.write(f"[ERROR] {symbol} {current_date.date()}: {e}")
|
||||||
|
return pl.concat(time_series)
|
||||||
@@ -229,15 +229,6 @@ wheels = [
|
|||||||
{ url = "https://files.pythonhosted.org/packages/1a/39/47f9197bdd44df24d67ac8893641e16f386c984a0619ef2ee4c51fbbc019/beautifulsoup4-4.14.3-py3-none-any.whl", hash = "sha256:0918bfe44902e6ad8d57732ba310582e98da931428d231a5ecb9e7c703a735bb", size = 107721, upload-time = "2025-11-30T15:08:24.087Z" },
|
{ url = "https://files.pythonhosted.org/packages/1a/39/47f9197bdd44df24d67ac8893641e16f386c984a0619ef2ee4c51fbbc019/beautifulsoup4-4.14.3-py3-none-any.whl", hash = "sha256:0918bfe44902e6ad8d57732ba310582e98da931428d231a5ecb9e7c703a735bb", size = 107721, upload-time = "2025-11-30T15:08:24.087Z" },
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "binance"
|
|
||||||
version = "0.3.110"
|
|
||||||
source = { registry = "https://pypi.org/simple" }
|
|
||||||
sdist = { url = "https://files.pythonhosted.org/packages/55/d1/dd29ef0615e0a500657e832c81cc2e3bc9fbc6171fa6106f99cfa3c309fe/binance-0.3.110.tar.gz", hash = "sha256:90a09493cbf64700d78f5257da4ea671e9290068ff7422dfedf21df78ac3836b", size = 939171, upload-time = "2026-06-16T15:14:20.324Z" }
|
|
||||||
wheels = [
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/b6/e2/6ee1ba2e315ff02ea20a95448e3b9d476769577d31667223f535920cc102/binance-0.3.110-py3-none-any.whl", hash = "sha256:f9a7ffbd8d50a8d02057c3fa712e759865c186ca1dfdc7fb4b8016efdf80e109", size = 1201848, upload-time = "2026-06-16T15:14:18.958Z" },
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "bleach"
|
name = "bleach"
|
||||||
version = "6.3.0"
|
version = "6.3.0"
|
||||||
@@ -2086,7 +2077,6 @@ source = { virtual = "." }
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
{ name = "aiohttp" },
|
{ name = "aiohttp" },
|
||||||
{ name = "altair" },
|
{ name = "altair" },
|
||||||
{ name = "binance" },
|
|
||||||
{ name = "cryptography" },
|
{ name = "cryptography" },
|
||||||
{ name = "jupyter" },
|
{ name = "jupyter" },
|
||||||
{ name = "matplotlib" },
|
{ name = "matplotlib" },
|
||||||
@@ -2103,7 +2093,6 @@ dependencies = [
|
|||||||
requires-dist = [
|
requires-dist = [
|
||||||
{ name = "aiohttp", specifier = ">=3.14.1" },
|
{ name = "aiohttp", specifier = ">=3.14.1" },
|
||||||
{ name = "altair", specifier = ">=6.2.1" },
|
{ name = "altair", specifier = ">=6.2.1" },
|
||||||
{ name = "binance", specifier = ">=0.3.110" },
|
|
||||||
{ name = "cryptography", specifier = ">=49.0.0" },
|
{ name = "cryptography", specifier = ">=49.0.0" },
|
||||||
{ name = "jupyter", specifier = ">=1.1.1" },
|
{ name = "jupyter", specifier = ">=1.1.1" },
|
||||||
{ name = "matplotlib", specifier = ">=3.10.9" },
|
{ name = "matplotlib", specifier = ">=3.10.9" },
|
||||||
|
|||||||
Reference in New Issue
Block a user