From dc22ebd3fac93f3b33c0e81ff7f3ae4e04c1ee13 Mon Sep 17 00:00:00 2001 From: Caleb Burke Date: Mon, 22 Jun 2026 17:58:42 -0700 Subject: [PATCH] part1 fixes --- .gitignore | 2 + pyproject.toml | 1 - .../binance.py | 148 ++++++++++++++++++ uv.lock | 11 -- 4 files changed, 150 insertions(+), 12 deletions(-) create mode 100644 tutorials/20260622164035_Let's Build a Quant Trading Strategy, MemLabs/binance.py diff --git a/.gitignore b/.gitignore index f7a830c..7b64a2e 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,5 @@ __pycache__/ *.pyc *.pyo +cache/ +*.parquet diff --git a/pyproject.toml b/pyproject.toml index 796c5b9..e494100 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,6 @@ requires-python = ">=3.14" dependencies = [ "aiohttp>=3.14.1", "altair>=6.2.1", - "binance>=0.3.110", "cryptography>=49.0.0", "jupyter>=1.1.1", "matplotlib>=3.10.9", diff --git a/tutorials/20260622164035_Let's Build a Quant Trading Strategy, MemLabs/binance.py b/tutorials/20260622164035_Let's Build a Quant Trading Strategy, MemLabs/binance.py new file mode 100644 index 0000000..e798b75 --- /dev/null +++ b/tutorials/20260622164035_Let's Build a Quant Trading Strategy, MemLabs/binance.py @@ -0,0 +1,148 @@ +from typing import List +import requests +import zipfile +from pathlib import Path +import polars as pl +from datetime import datetime, timedelta +from tqdm import tqdm + +import research + +MAKER_FEE = 0.000450 +TAKER_FEE = 0.000450 + +def download_and_unzip(symbol: str, date: str | datetime, + download_dir: str = "data", cache_dir: str = "cache") -> pl.DataFrame: + """ + Download and unzip Binance futures trade data for a given symbol and date. + Caches results as parquet files to avoid repeated downloads. + """ + # Normalize date to string + date_str = date.strftime('%Y-%m-%d') if isinstance(date, datetime) else date + + cache_dir = Path(cache_dir) + cache_dir.mkdir(exist_ok=True) + cache_path = cache_dir / f"{symbol}-trades-{date_str}.parquet" + + if cache_path.exists(): + return pl.read_parquet(cache_path) + + url = f"https://data.binance.vision/data/futures/um/daily/trades/{symbol}/{symbol}-trades-{date_str}.zip" + + download_dir = Path(download_dir) + download_dir.mkdir(exist_ok=True) + zip_path = download_dir / f"{symbol}-trades-{date_str}.zip" + + # Download zip + response = requests.get(url, stream=True) + response.raise_for_status() + with open(zip_path, 'wb') as f: + for chunk in response.iter_content(chunk_size=8192): + f.write(chunk) + + # Extract + with zipfile.ZipFile(zip_path, 'r') as zf: + zf.extractall(download_dir) + + csv_path = download_dir / f"{symbol}-trades-{date_str}.csv" + + # Load into Polars + df = pl.read_csv( + csv_path, + schema={ + "id": pl.Int64, + "price": pl.Float64, + "qty": pl.Float64, + "quoteQty": pl.Float64, + "time": pl.Int64, + "isBuyerMaker": pl.Boolean, + } + ).with_columns( + pl.from_epoch("time", time_unit="ms").alias("datetime") + ) + + # Cache and clean + df.write_parquet(cache_path) + zip_path.unlink(missing_ok=True) + csv_path.unlink(missing_ok=True) + + return df + + +def download_date_range(symbol: str, start_date: str | datetime, end_date: str | datetime, + download_dir: str = "data", cache_dir: str = "cache") -> list[pl.DataFrame]: + """ + Download trade data for a range of dates with a progress bar. + """ + if isinstance(start_date, str): + start_date = datetime.strptime(start_date, '%Y-%m-%d') + if isinstance(end_date, str): + end_date = datetime.strptime(end_date, '%Y-%m-%d') + + num_days = (end_date - start_date).days + 1 + + for i in tqdm(range(num_days), desc=f"Downloading {symbol}"): + current_date = start_date + timedelta(days=i) + try: + download_and_unzip(symbol, current_date, download_dir, cache_dir) + except Exception as e: + tqdm.write(f"[ERROR] {symbol} {current_date.date()}: {e}") + + + +def download_trades(symbol: str, no_days: int, + download_dir: str = "data", cache_dir: str = "cache", return_trades=False) -> pl.DataFrame: + """ + Download trades for the last N days up to yesterday with a progress bar. + """ + yesterday = datetime.now() - timedelta(days=1) + start_date = yesterday - timedelta(days=no_days - 1) + + dfs = [] + for i in tqdm(range(no_days), desc=f"Downloading {symbol}"): + current_date = start_date + timedelta(days=i) + try: + if return_trades: + dfs.append(download_and_unzip(symbol, current_date, download_dir, cache_dir)) + else: + download_and_unzip(symbol, current_date, download_dir, cache_dir) + except Exception as e: + tqdm.write(f"[ERROR] {symbol} {current_date.date()}: {e}") + + return pl.concat(dfs) if return_trades else None + + +def download_ohlc_timeseries(symbol: str, no_days: int, time_interval: str, download_dir: str = "data", cache_dir: str = "cache") -> pl.DataFrame: + """ + Download trades for the last N days up to yesterday with a progress bar. + """ + yesterday = datetime.now() - timedelta(days=1) + start_date = yesterday - timedelta(days=no_days - 1) + + time_series = [] + for i in tqdm(range(no_days), desc=f"Downloading {symbol}"): + current_date = start_date + timedelta(days=i) + try: + trades = download_and_unzip(symbol, current_date, download_dir, cache_dir) + time_series.append(research.timeseries(trades, time_interval, research.OHLC_AGGS)) + except Exception as e: + tqdm.write(f"[ERROR] {symbol} {current_date.date()}: {e}") + return pl.concat(time_series) + + +def download_timeseries(symbol: str, no_days: int, time_interval: str, aggs: List[pl.Expr], download_dir: str = "data", cache_dir: str = "cache") -> pl.DataFrame: + """ + Download trades for the last N days up to yesterday with a progress bar. + """ + yesterday = datetime.now() - timedelta(days=1) + start_date = yesterday - timedelta(days=no_days - 1) + + time_series = [] + for i in tqdm(range(no_days), desc=f"Downloading {symbol}"): + current_date = start_date + timedelta(days=i) + try: + trades = download_and_unzip(symbol, current_date, download_dir, cache_dir) + time_series.append(research.timeseries(trades, time_interval, aggs)) + except Exception as e: + tqdm.write(f"[ERROR] {symbol} {current_date.date()}: {e}") + return pl.concat(time_series) \ No newline at end of file diff --git a/uv.lock b/uv.lock index 0c644a1..354acba 100644 --- a/uv.lock +++ b/uv.lock @@ -229,15 +229,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/1a/39/47f9197bdd44df24d67ac8893641e16f386c984a0619ef2ee4c51fbbc019/beautifulsoup4-4.14.3-py3-none-any.whl", hash = "sha256:0918bfe44902e6ad8d57732ba310582e98da931428d231a5ecb9e7c703a735bb", size = 107721, upload-time = "2025-11-30T15:08:24.087Z" }, ] -[[package]] -name = "binance" -version = "0.3.110" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/55/d1/dd29ef0615e0a500657e832c81cc2e3bc9fbc6171fa6106f99cfa3c309fe/binance-0.3.110.tar.gz", hash = "sha256:90a09493cbf64700d78f5257da4ea671e9290068ff7422dfedf21df78ac3836b", size = 939171, upload-time = "2026-06-16T15:14:20.324Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/b6/e2/6ee1ba2e315ff02ea20a95448e3b9d476769577d31667223f535920cc102/binance-0.3.110-py3-none-any.whl", hash = "sha256:f9a7ffbd8d50a8d02057c3fa712e759865c186ca1dfdc7fb4b8016efdf80e109", size = 1201848, upload-time = "2026-06-16T15:14:18.958Z" }, -] - [[package]] name = "bleach" version = "6.3.0" @@ -2086,7 +2077,6 @@ source = { virtual = "." } dependencies = [ { name = "aiohttp" }, { name = "altair" }, - { name = "binance" }, { name = "cryptography" }, { name = "jupyter" }, { name = "matplotlib" }, @@ -2103,7 +2093,6 @@ dependencies = [ requires-dist = [ { name = "aiohttp", specifier = ">=3.14.1" }, { name = "altair", specifier = ">=6.2.1" }, - { name = "binance", specifier = ">=0.3.110" }, { name = "cryptography", specifier = ">=49.0.0" }, { name = "jupyter", specifier = ">=1.1.1" }, { name = "matplotlib", specifier = ">=3.10.9" },