{ "cells": [ { "cell_type": "markdown", "id": "66be0f9b-942f-421c-882b-5cf6a8d225ef", "metadata": {}, "source": [ "# Creating an AI Trading Bot using Machine Learning with help of AI" ] }, { "cell_type": "markdown", "id": "f70ecd95-f883-40e1-a73d-1ade318c8783", "metadata": {}, "source": [ "## Used tools" ] }, { "cell_type": "markdown", "id": "314ddf94-059a-452a-8098-22b1ec78d5b1", "metadata": {}, "source": [ "- https://aistudio.google.com\n", "- https://alpaca.markets/\n", "- https://jupyter.org/\n", "- https://superai.pl/courses.html" ] }, { "cell_type": "markdown", "id": "2b8e5201-a668-4b6a-9be0-5172b5e67b95", "metadata": {}, "source": [ "## Creating AI Trading Bot with AI" ] }, { "cell_type": "markdown", "id": "9d07ca5d-c625-496a-bd5c-5efa72b3e404", "metadata": {}, "source": [ "### 1st Prompt" ] }, { "cell_type": "markdown", "id": "bccd6bdd-6e98-4f76-bd50-eb0f13f5636c", "metadata": {}, "source": [ "1. \"I would like to create a trading bot. It should be using machine learning to trade BTCUSD in one minute intervals and Alpaca Markets. Can you help me with that?\"\n", "\n", "(Remember: At the moment Alpaca uses different symbols for executing orders such as “BTC/USD” and checking portfolio positions such as “BTCUSD”.)\n" ] }, { "cell_type": "markdown", "id": "6cf0ec94-cdb3-47e5-918e-5ba2cfd6f16e", "metadata": {}, "source": [ "#### Keys to Alpaca" ] }, { "cell_type": "code", "execution_count": 1, "id": "7d232c4b-7f4a-4f9d-b557-1abe46047672", "metadata": {}, "outputs": [], "source": [ "KEY_ID = \"your API KEY\" #replace it with your own KEY_ID from Alpaca: https://alpaca.markets/\n", "SECRET_KEY = \"your SECRET KEY\" #replace it with your own SECRET_KEY from Alpaca" ] }, { "cell_type": "markdown", "id": "ea5c66d3-6c54-49a0-800e-7dd663fec83c", "metadata": {}, "source": [ "### 1st Attempt" ] }, { "cell_type": "code", "execution_count": 2, "id": "56fce22c-c529-40c1-ae46-3576bcb75475", "metadata": {}, "outputs": [], "source": [ "import warnings\n", "warnings.filterwarnings('ignore')" ] }, { "cell_type": "markdown", "id": "86801639-ce2c-4bac-bf23-94ab71cf70b7", "metadata": {}, "source": [ "Step 1: Alpaca Setup & Python Environment\n", "\n", " Sign up for an Alpaca account: https://alpaca.markets/\n", "\n", " Get your API Key ID and Secret Key from your paper trading account dashboard.\n", "\n", " Install necessary Python libraries:" ] }, { "cell_type": "code", "execution_count": 3, "id": "91850edf-7220-407e-a577-7a8cf4ee6ca9", "metadata": { "scrolled": true }, "outputs": [], "source": [ "pip install alpaca-trade-api pandas scikit-learn xgboost numpy pandas-ta" ] }, { "cell_type": "markdown", "id": "41adbca4-5621-4491-8541-3dd4f52eed2b", "metadata": {}, "source": [ "Set up API credentials (environment variables are best):" ] }, { "cell_type": "code", "execution_count": 4, "id": "bc5e6181-d205-476d-8c17-8cecda0c01ed", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Connected to Alpaca Paper Trading.\n" ] } ], "source": [ "import os\n", "import alpaca_trade_api as tradeapi\n", "import pandas as pd\n", "from datetime import datetime, timedelta\n", "import time\n", "\n", "# For Paper Trading\n", "os.environ['APCA_API_BASE_URL'] = 'https://paper-api.alpaca.markets'\n", "# Replace with your actual keys or set them as environment variables\n", "os.environ['APCA_API_KEY_ID'] = 'your API KEY'\n", "os.environ['APCA_API_SECRET_KEY'] = 'your SECRET KEY'\n", "\n", "API_KEY = os.getenv('APCA_API_KEY_ID')\n", "API_SECRET = os.getenv('APCA_API_SECRET_KEY')\n", "BASE_URL = os.getenv('APCA_API_BASE_URL')\n", "\n", "api = tradeapi.REST(API_KEY, API_SECRET, BASE_URL, api_version='v2')\n", "\n", "print(\"Connected to Alpaca Paper Trading.\")\n", "# Check account\n", "# account = api.get_account()\n", "# print(f\"Account status: {account.status}\")" ] }, { "cell_type": "markdown", "id": "45682be7-b549-422b-8a7d-1d9dbd8f2672", "metadata": {}, "source": [ "Step 2: Data Acquisition" ] }, { "cell_type": "code", "execution_count": 5, "id": "923fea7c-910e-45cb-b19f-3b7a3002322c", "metadata": {}, "outputs": [ { "ename": "AttributeError", "evalue": "module 'alpaca_trade_api' has no attribute 'টাইমফ্রেম'", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)", "Cell \u001b[1;32mIn[5], line 61\u001b[0m\n\u001b[0;32m 59\u001b[0m \u001b[38;5;66;03m# Example usage:\u001b[39;00m\n\u001b[0;32m 60\u001b[0m symbol \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mBTC/USD\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;66;03m# Alpaca uses \"BTC/USD\" for crypto pairs\u001b[39;00m\n\u001b[1;32m---> 61\u001b[0m timeframe \u001b[38;5;241m=\u001b[39m tradeapi\u001b[38;5;241m.\u001b[39m টাইমফ্রেম\u001b[38;5;241m.\u001b[39mমিনিট \u001b[38;5;66;03m# or \"1Min\" for older SDK versions\u001b[39;00m\n\u001b[0;32m 62\u001b[0m \u001b[38;5;66;03m# Fetch last 30 days of data for example\u001b[39;00m\n\u001b[0;32m 63\u001b[0m start_date \u001b[38;5;241m=\u001b[39m (datetime\u001b[38;5;241m.\u001b[39mnow() \u001b[38;5;241m-\u001b[39m timedelta(days\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m30\u001b[39m))\u001b[38;5;241m.\u001b[39mstrftime(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m%\u001b[39m\u001b[38;5;124mY-\u001b[39m\u001b[38;5;124m%\u001b[39m\u001b[38;5;124mm-\u001b[39m\u001b[38;5;132;01m%d\u001b[39;00m\u001b[38;5;124m'\u001b[39m)\n", "\u001b[1;31mAttributeError\u001b[0m: module 'alpaca_trade_api' has no attribute 'টাইমফ্রেম'" ] } ], "source": [ "def fetch_data(symbol, timeframe, start_date_str, end_date_str=None):\n", " \"\"\"Fetches historical crypto data from Alpaca.\"\"\"\n", " if end_date_str is None:\n", " end_date_str = datetime.now().strftime('%Y-%m-%d')\n", "\n", " # Alpaca API expects ISO 8601 format for start/end times\n", " # And it has a limit on how many bars can be fetched per request (e.g., 10000 for crypto)\n", " # So we may need to fetch in chunks if requesting a long period.\n", "\n", " all_bars = []\n", " start_dt = pd.to_datetime(start_date_str, utc=True)\n", " end_dt = pd.to_datetime(end_date_str, utc=True)\n", "\n", " # Fetch data in chunks to avoid hitting API limits for very long periods\n", " # For 1-minute data, 10000 bars is about 7 days.\n", " # Let's fetch data in smaller chunks, e.g., 5 days at a time.\n", " current_start = start_dt\n", " while current_start < end_dt:\n", " chunk_end = min(current_start + timedelta(days=5), end_dt) # Adjust chunk size as needed\n", " print(f\"Fetching data from {current_start.isoformat()} to {chunk_end.isoformat()}\")\n", "\n", " # Alpaca's get_crypto_bars expects start and end in ISO format\n", " bars = api.get_crypto_bars(\n", " symbol,\n", " timeframe,\n", " start=current_start.isoformat(),\n", " end=chunk_end.isoformat(),\n", " limit=10000 # Max limit per request\n", " ).df\n", "\n", " if bars.empty:\n", " print(f\"No data found for chunk starting {current_start.isoformat()}\")\n", " if current_start + timedelta(days=5) > end_dt and not all_bars: # if first chunk and no data\n", " break\n", " current_start += timedelta(days=5) # Move to next chunk period\n", " time.sleep(1) # Be nice to the API\n", " continue\n", "\n", " all_bars.append(bars)\n", " # Update current_start to the timestamp of the last bar fetched + 1 minute\n", " # to avoid overlap and to ensure we move forward.\n", " if not bars.index.empty:\n", " current_start = bars.index[-1] + pd.Timedelta(minutes=1)\n", " else: # Should not happen if bars is not empty, but as a safeguard\n", " current_start += timedelta(days=5)\n", "\n", " time.sleep(1) # Respect API rate limits\n", "\n", " if not all_bars:\n", " print(\"No data fetched. Check your date range or symbol.\")\n", " return pd.DataFrame()\n", "\n", " df = pd.concat(all_bars)\n", " df = df[~df.index.duplicated(keep='first')] # Remove potential duplicates from chunking\n", " df = df.sort_index() # Ensure chronological order\n", " df = df[df.index <= end_dt] # Ensure we don't go past the requested end_dt\n", " return df\n", "\n", "# Example usage:\n", "symbol = \"BTC/USD\" # Alpaca uses \"BTC/USD\" for crypto pairs\n", "timeframe = tradeapi. টাইমফ্রেম.মিনিট # or \"1Min\" for older SDK versions\n", "# Fetch last 30 days of data for example\n", "start_date = (datetime.now() - timedelta(days=30)).strftime('%Y-%m-%d')\n", "# end_date = datetime.now().strftime('%Y-%m-%d') # Fetches up to now\n", "\n", "btc_data = fetch_data(symbol, timeframe, start_date)\n", "if not btc_data.empty:\n", " print(f\"Fetched {len(btc_data)} rows of {symbol} data.\")\n", " print(btc_data.head())\n", " print(btc_data.tail())\n", "else:\n", " print(\"Failed to fetch data.\")" ] }, { "cell_type": "markdown", "id": "dc7a0229-e758-4df6-a8b1-1dbe94c6f7a5", "metadata": {}, "source": [ "Step 3: Feature Engineering\n", "\n", "This is where you create signals for your model. pandas-ta is a great library for this." ] }, { "cell_type": "code", "execution_count": null, "id": "c3eac019-211a-4630-95c9-9fcc61e358ec", "metadata": {}, "outputs": [], "source": [ "import pandas_ta as ta\n", "\n", "def create_features(df):\n", " if df.empty or 'close' not in df.columns:\n", " print(\"DataFrame is empty or 'close' column is missing.\")\n", " return df\n", "\n", " df.ta.sma(length=10, append=True, col_names=('SMA_10'))\n", " df.ta.sma(length=30, append=True, col_names=('SMA_30'))\n", " df.ta.ema(length=10, append=True, col_names=('EMA_10'))\n", " df.ta.rsi(length=14, append=True, col_names=('RSI_14'))\n", " df.ta.macd(append=True, col_names=('MACD_12_26_9', 'MACDh_12_26_9', 'MACDs_12_26_9'))\n", " df.ta.bbands(length=20, append=True, col_names=('BBL_20_2.0', 'BBM_20_2.0', 'BBU_20_2.0', 'BBB_20_2.0', 'BBP_20_2.0'))\n", " df.ta.atr(length=14, append=True, col_names=('ATR_14'))\n", "\n", " # Lagged returns\n", " for lag in [1, 3, 5, 10]:\n", " df[f'return_{lag}m'] = df['close'].pct_change(periods=lag)\n", "\n", " # Add more features: volatility, momentum, volume-based if available, etc.\n", " # e.g., log returns, price relative to moving average, etc.\n", "\n", " df.dropna(inplace=True) # Remove rows with NaNs created by indicators\n", " return df\n", "\n", "if not btc_data.empty:\n", " featured_data = create_features(btc_data.copy()) # Use .copy() to avoid modifying original\n", " print(\"\\nData with features:\")\n", " print(featured_data.head())\n", "else:\n", " print(\"Cannot create features, btc_data is empty.\")\n", " featured_data = pd.DataFrame() # ensure it's a DataFrame" ] }, { "cell_type": "markdown", "id": "8d1536cf-2ee0-4a30-ac4e-9fa82c8936c7", "metadata": {}, "source": [ "Step 4: Model Training - Defining Target & Training\n", "\n", "Let's define a simple target: will the price be higher or lower in N minutes?" ] }, { "cell_type": "code", "execution_count": null, "id": "8e773d98-f353-4ab4-b2a1-5d82024865b0", "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import train_test_split, TimeSeriesSplit\n", "from sklearn.ensemble import RandomForestClassifier # Example model\n", "from sklearn.metrics import accuracy_score, classification_report\n", "import xgboost as xgb\n", "\n", "def prepare_data_for_model(df, target_horizon=5, test_size=0.2):\n", " \"\"\"\n", " Prepares X (features) and y (target) for the ML model.\n", " Target: 1 if price increases by more than a small threshold after target_horizon periods, 0 otherwise.\n", " -1 if price decreases by more than a small threshold. (Optional: make it 3 classes)\n", " \"\"\"\n", " if df.empty or 'close' not in df.columns:\n", " print(\"DataFrame is empty or 'close' column is missing.\")\n", " return pd.DataFrame(), pd.Series(dtype='float64'), pd.DataFrame(), pd.Series(dtype='float64')\n", "\n", " # Define target: 1 if price goes up in `target_horizon` minutes, 0 otherwise\n", " # A small threshold can help avoid noise around 0% change\n", " # price_threshold = 0.0005 # e.g., 0.05% change\n", " # df['future_price'] = df['close'].shift(-target_horizon)\n", " # df['price_change'] = (df['future_price'] - df['close']) / df['close']\n", " # df['target'] = 0 # Hold\n", " # df.loc[df['price_change'] > price_threshold, 'target'] = 1 # Buy\n", " # df.loc[df['price_change'] < -price_threshold, 'target'] = -1 # Sell (for 3-class)\n", " # For 2-class (Up/Not Up):\n", " df['target'] = (df['close'].shift(-target_horizon) > df['close']).astype(int)\n", "\n", " df.dropna(inplace=True) # Remove rows with NaN target (due to shift)\n", "\n", " feature_columns = [col for col in df.columns if col not in ['open', 'high', 'low', 'close', 'volume', 'trade_count', 'vwap', 'target', 'future_price', 'price_change']]\n", " X = df[feature_columns]\n", " y = df['target']\n", "\n", " # Time series split is crucial: DO NOT shuffle time series data for training\n", " # For a simple split:\n", " split_index = int(len(X) * (1 - test_size))\n", " X_train, X_test = X[:split_index], X[split_index:]\n", " y_train, y_test = y[:split_index], y[split_index:]\n", "\n", " # For more robust cross-validation, use TimeSeriesSplit\n", " # tscv = TimeSeriesSplit(n_splits=5)\n", " # for train_index, test_index in tscv.split(X):\n", " # X_train, X_test = X.iloc[train_index], X.iloc[test_index]\n", " # y_train, y_test = y.iloc[train_index], y.iloc[test_index]\n", " # Train and evaluate your model here\n", "\n", " return X_train, X_test, y_train, y_test, feature_columns\n", "\n", "\n", "if not featured_data.empty:\n", " X_train, X_test, y_train, y_test, feature_cols = prepare_data_for_model(featured_data.copy(), target_horizon=5)\n", "\n", " if not X_train.empty:\n", " print(f\"\\nTraining data shape: X_train: {X_train.shape}, y_train: {y_train.shape}\")\n", " print(f\"Test data shape: X_test: {X_test.shape}, y_test: {y_test.shape}\")\n", " print(f\"Features used: {feature_cols}\")\n", "\n", " # Example Model: Random Forest\n", " # model = RandomForestClassifier(n_estimators=100, random_state=42, class_weight='balanced')\n", "\n", " # Example Model: XGBoost (often performs well)\n", " model = xgb.XGBClassifier(\n", " objective='binary:logistic', # or 'multi:softprob' for multi-class\n", " n_estimators=100,\n", " learning_rate=0.1,\n", " max_depth=3,\n", " use_label_encoder=False, # Suppress a warning\n", " eval_metric='logloss' # or 'mlogloss' for multi-class\n", " )\n", "\n", " model.fit(X_train, y_train)\n", "\n", " # Evaluate on test set\n", " y_pred = model.predict(X_test)\n", " print(\"\\nModel Evaluation on Test Set:\")\n", " print(f\"Accuracy: {accuracy_score(y_test, y_pred):.4f}\")\n", " print(classification_report(y_test, y_pred, zero_division=0))\n", "\n", " # Feature importance (for tree-based models)\n", " if hasattr(model, 'feature_importances_'):\n", " importances = pd.Series(model.feature_importances_, index=X_train.columns).sort_values(ascending=False)\n", " print(\"\\nFeature Importances:\")\n", " print(importances.head(10))\n", " else:\n", " print(\"Not enough data to create training/test sets after feature engineering and target creation.\")\n", "else:\n", " print(\"Cannot prepare data for model, featured_data is empty.\")\n", " model = None # Ensure model is defined even if training fails" ] }, { "cell_type": "markdown", "id": "d5279216-4774-4173-a268-a1ca92893ac8", "metadata": {}, "source": [ "Important Considerations for Modeling:\n", "\n", " Target Definition: This is critical. Predicting direction is hard. Predicting magnitude or using a threshold (e.g., price must move > 0.1% to be a \"1\") can be better.\n", "\n", " Class Imbalance: If \"up\" signals are rare, your model might be biased. Use techniques like class_weight='balanced' (for some models) or over/undersampling (e.g., SMOTE).\n", "\n", " Stationarity: Price series are generally non-stationary. Features like returns or indicators often help.\n", "\n", " Overfitting: Models can learn noise from historical data. Robust cross-validation (like TimeSeriesSplit) and regularization are key.\n", "\n", "Step 5: Backtesting (Simplified Vectorized Example)\n", "\n", "A proper backtest is event-driven and considers transaction costs, slippage, etc. This is a very simplified version." ] }, { "cell_type": "code", "execution_count": null, "id": "72a69e0a-484d-490f-8c95-195cccfe8e40", "metadata": {}, "outputs": [], "source": [ "def run_simple_backtest(df_with_predictions, initial_capital=10000, trade_size_usd=1000, transaction_cost_pct=0.003): # Alpaca crypto fee\n", " \"\"\"\n", " A very simplified vectorized backtest.\n", " Assumes df_with_predictions has a 'signal' column (1 for buy, 0 for hold/nothing, -1 for sell if implementing).\n", " For this example, we'll assume our model's prediction (0 or 1) is the signal.\n", " 1 = Go Long, 0 = Exit Long (or do nothing if not in position)\n", " \"\"\"\n", " if df_with_predictions.empty or 'predicted_signal' not in df_with_predictions.columns:\n", " print(\"DataFrame for backtest is empty or 'predicted_signal' column missing.\")\n", " return\n", "\n", " capital = initial_capital\n", " position_btc = 0 # Amount of BTC held\n", " portfolio_value = []\n", "\n", " # Assume 'predicted_signal' comes from your model (1 for predicted up, 0 for predicted down/neutral)\n", " # Let's assume a simple strategy: if signal is 1, buy. If signal is 0 and we have a position, sell.\n", "\n", " for i in range(len(df_with_predictions)):\n", " current_price = df_with_predictions['close'].iloc[i]\n", " signal = df_with_predictions['predicted_signal'].iloc[i]\n", "\n", " # Decision logic\n", " if signal == 1 and position_btc == 0: # Buy signal and no current position\n", " # Buy\n", " amount_to_buy_btc = trade_size_usd / current_price\n", " cost = amount_to_buy_btc * current_price * (1 + transaction_cost_pct)\n", " if capital >= cost:\n", " capital -= cost\n", " position_btc += amount_to_buy_btc\n", " # print(f\"{df_with_predictions.index[i]}: BUY {amount_to_buy_btc:.6f} BTC @ {current_price:.2f}\")\n", "\n", " elif signal == 0 and position_btc > 0: # Sell signal (or neutral) and have a position\n", " # Sell\n", " proceeds = position_btc * current_price * (1 - transaction_cost_pct)\n", " capital += proceeds\n", " # print(f\"{df_with_predictions.index[i]}: SELL {position_btc:.6f} BTC @ {current_price:.2f}\")\n", " position_btc = 0\n", "\n", " current_portfolio_value = capital + (position_btc * current_price)\n", " portfolio_value.append(current_portfolio_value)\n", "\n", " df_with_predictions['portfolio_value'] = portfolio_value\n", " print(\"\\nBacktest Results:\")\n", " print(f\"Initial Capital: ${initial_capital:.2f}\")\n", " print(f\"Final Portfolio Value: ${df_with_predictions['portfolio_value'].iloc[-1]:.2f}\")\n", " returns = (df_with_predictions['portfolio_value'].iloc[-1] / initial_capital - 1) * 100\n", " print(f\"Total Return: {returns:.2f}%\")\n", "\n", " # Plotting (optional)\n", " # import matplotlib.pyplot as plt\n", " # plt.figure(figsize=(12,6))\n", " # plt.plot(df_with_predictions.index, df_with_predictions['portfolio_value'])\n", " # plt.title('Portfolio Value Over Time')\n", " # plt.xlabel('Date')\n", " # plt.ylabel('Portfolio Value ($)')\n", " # plt.show()\n", "\n", "if model and not X_test.empty:\n", " # Use the model to predict on the entire test set for backtesting\n", " # For a more realistic backtest, you'd re-train periodically or use a walk-forward approach.\n", " # Here, we're just using the single model trained on X_train.\n", " all_featured_data_for_backtest = featured_data.loc[X_test.index].copy() # Get original data rows for X_test\n", " all_featured_data_for_backtest['predicted_signal'] = model.predict(X_test) # Use the trained model\n", "\n", " run_simple_backtest(all_featured_data_for_backtest)\n", "else:\n", " print(\"Skipping backtest as model or test data is not available.\")" ] }, { "cell_type": "markdown", "id": "8f950f19-ea56-4d27-bb0c-1aa029f6f909", "metadata": {}, "source": [ "Backtesting Libraries: For more serious backtesting, consider backtrader or zipline-reloaded. They handle many complexities.\n", "\n", "Step 6: Signal Generation & Order Execution (Live/Paper Trading)\n", "\n", "This is where you'd run the bot periodically (e.g., every minute)." ] }, { "cell_type": "code", "execution_count": null, "id": "9454be39-70d8-49ed-a814-8695d69e32b2", "metadata": {}, "outputs": [], "source": [ "SYMBOL = \"BTC/USD\"\n", "TRADE_QTY_USD = 100 # Amount in USD to trade per signal. Adjust based on risk tolerance.\n", "TARGET_HORIZON_MINUTES = 5 # Same as used in training\n", "\n", "# Global model and feature_cols (assuming they are trained and available)\n", "# model = ... (your trained model)\n", "# feature_cols = ... (list of feature column names used for training)\n", "\n", "def get_latest_bar_features():\n", " \"\"\"Fetches latest bars, calculates features for the most recent one.\"\"\"\n", " # Fetch enough data to calculate all features (e.g., max lookback of your indicators)\n", " # If SMA_30 is longest, need at least 30 + target_horizon previous bars\n", " # Let's fetch more to be safe, e.g., 100 bars\n", " now = datetime.now()\n", " start_fetch_dt = (now - timedelta(minutes=150)).strftime('%Y-%m-%d %H:%M:%S') # fetch last 150 mins\n", "\n", " latest_bars_df = api.get_crypto_bars(\n", " SYMBOL,\n", " tradeapi. টাইমফ্রেম.মিনিট,\n", " start=start_fetch_dt, # Alpaca needs ISO format with T\n", " # end defaults to now\n", " limit=150 # fetch a bit more than needed for features\n", " ).df\n", " \n", " if latest_bars_df.empty or len(latest_bars_df) < 35: # Min needed for SMA_30 + some buffer\n", " print(\"Not enough recent bars to calculate features.\")\n", " return None\n", "\n", " featured_bars = create_features(latest_bars_df.copy())\n", " if featured_bars.empty:\n", " print(\"Failed to create features for latest bars.\")\n", " return None\n", " # Return only the features for the most recent complete bar\n", " return featured_bars[feature_cols].iloc[-1:] # Return as DataFrame\n", "\n", "def check_and_place_trade():\n", " global model, feature_cols # Ensure these are accessible\n", "\n", " if model is None or feature_cols is None:\n", " print(\"Model not trained or feature columns not defined. Skipping trade check.\")\n", " return\n", "\n", " print(f\"\\n{datetime.now()}: Checking for trading signal...\")\n", " current_features_df = get_latest_bar_features()\n", "\n", " if current_features_df is None or current_features_df.empty:\n", " print(\"Could not get features for the latest bar.\")\n", " return\n", "\n", " # Ensure columns are in the same order as during training\n", " current_features_df = current_features_df[feature_cols]\n", "\n", " prediction = model.predict(current_features_df)\n", " signal = prediction[0] # 0 for down/neutral, 1 for up\n", "\n", " print(f\"Raw features for prediction: {current_features_df.iloc[0].to_dict()}\")\n", " print(f\"Model prediction: {signal}\")\n", "\n", " try:\n", " positions = api.list_positions()\n", " btc_position = next((p for p in positions if p.symbol == SYMBOL), None)\n", " current_price_info = api.get_latest_crypto_quote(SYMBOL) # Use quote for more current price\n", " current_price = (current_price_info.ap + current_price_info.bp) / 2 # Mid price\n", "\n", " if not current_price:\n", " print(\"Could not get current price for BTC/USD.\")\n", " return\n", "\n", "\n", " if signal == 1: # Predicted UP - Potential BUY\n", " if btc_position is None or float(btc_position.qty) == 0:\n", " qty_to_buy = TRADE_QTY_USD / current_price\n", " print(f\"BUY signal. Attempting to buy {qty_to_buy:.6f} {SYMBOL} at ~${current_price:.2f}\")\n", " api.submit_order(\n", " symbol=SYMBOL,\n", " qty=round(qty_to_buy, 6), # Alpaca crypto needs precision\n", " side='buy',\n", " type='market',\n", " time_in_force='gtc' # Good 'til canceled\n", " )\n", " print(\"BUY order submitted.\")\n", " else:\n", " print(f\"BUY signal, but already have a position of {btc_position.qty} {SYMBOL}. Holding.\")\n", "\n", " elif signal == 0: # Predicted DOWN/NEUTRAL - Potential SELL\n", " if btc_position and float(btc_position.qty) > 0:\n", " qty_to_sell = float(btc_position.qty) # Sell entire position\n", " print(f\"SELL signal. Attempting to sell {qty_to_sell:.6f} {SYMBOL} at ~${current_price:.2f}\")\n", " api.submit_order(\n", " symbol=SYMBOL,\n", " qty=round(qty_to_sell, 6),\n", " side='sell',\n", " type='market',\n", " time_in_force='gtc'\n", " )\n", " print(\"SELL order submitted.\")\n", " else:\n", " print(\"SELL signal, but no open position to sell. Doing nothing.\")\n", " else:\n", " print(\"Neutral signal or unrecognized signal. Doing nothing.\")\n", "\n", " except Exception as e:\n", " print(f\"Error during trade execution: {e}\")\n", "\n", "# Main loop (very basic scheduler)\n", "# For a robust bot, use APScheduler or run it in a more managed environment (e.g., cloud server with cron)\n", "if __name__ == \"__main__\" and model is not None: # Ensure model is trained\n", " # This is a simplified loop. In a real bot, you'd schedule this.\n", " # For example, using APScheduler to run exactly at the start of each minute.\n", " print(\"Starting dummy trading loop (runs a few times for demo). Press Ctrl+C to stop.\")\n", " print(\"IMPORTANT: This is for PAPER TRADING ONLY.\")\n", " print(f\"Will use model: {type(model).__name__} and features: {feature_cols}\")\n", " try:\n", " # Initial run\n", " check_and_place_trade()\n", " for i in range(5): # Run for a few iterations for demo\n", " # Wait for the next minute (approximately)\n", " # A more precise scheduler (like APScheduler) is better for live trading\n", " time.sleep(60)\n", " check_and_place_trade()\n", " except KeyboardInterrupt:\n", " print(\"Trading loop stopped by user.\")\n", " except Exception as e:\n", " print(f\"An error occurred in the trading loop: {e}\")\n", "else:\n", " if model is None:\n", " print(\"Model is not trained. Cannot start trading loop.\")" ] }, { "cell_type": "markdown", "id": "8236c202-3ecf-4e5b-b1cf-f0e0ea222b80", "metadata": {}, "source": [ "Step 7: Risk Management (Conceptual)\n", "\n", " Position Sizing: Don't risk too much on a single trade (e.g., TRADE_QTY_USD should be a small % of your paper capital).\n", "\n", " Stop-Loss: Automatically sell if the price moves against you by a certain percentage or dollar amount after entering a trade. Alpaca supports stop-loss orders." ] }, { "cell_type": "code", "execution_count": null, "id": "aac9b668-35a3-4a53-ab57-c785cd7e4f42", "metadata": {}, "outputs": [], "source": [ "# Example of a market buy order with a trailing stop-loss\n", "# api.submit_order(\n", "# symbol=SYMBOL,\n", "# qty=qty_to_buy,\n", "# side='buy',\n", "# type='market',\n", "# time_in_force='day',\n", "# trail_percent='1.5' # Trail stop loss 1.5% below high water mark\n", "# )" ] }, { "cell_type": "markdown", "id": "327d8404-15c6-48fc-aeee-db45ece2df81", "metadata": {}, "source": [ " Take-Profit: Automatically sell if the price moves in your favor by a certain amount.\n", "\n", " Max Drawdown: If your total capital drops by X%, stop trading and re-evaluate.\n", "\n", "Step 8: Deployment & Monitoring\n", "\n", " Server: Run your bot on a reliable machine or cloud server (AWS EC2, Google Cloud, etc.) that's always on.\n", "\n", " Scheduling: Use cron (Linux) or APScheduler (Python library) to run your check_and_place_trade function every minute precisely.\n", "\n", " Logging: Log every decision, trade, error, and API response.\n", "\n", " Monitoring: Track P&L, number of trades, win rate, errors, API connectivity, etc. Set up alerts.\n", "\n", "Very Important Next Steps & Considerations:\n", "\n", " PAPER TRADE EXTENSIVELY. I cannot stress this enough.\n", "\n", " Improve Feature Engineering: This is key. Explore more advanced features, time-series properties, etc.\n", "\n", " Hyperparameter Tuning: Use GridSearchCV or RandomizedSearchCV (or Optuna/Hyperopt) to find optimal model parameters.\n", "\n", " Robust Backtesting: Implement a proper event-driven backtester that accounts for slippage and realistic transaction costs.\n", "\n", " Walk-Forward Optimization: Markets change. Periodically re-train your model on newer data and test on out-of-sample data that follows.\n", "\n", " Understand Alpaca's Crypto Trading Rules: Fees (currently 0.3% for taker/maker on basic accounts), minimum order sizes, precision for quantities.\n", "\n", " Error Handling & Resilience: Your bot needs to handle API errors, network issues, unexpected data, etc.\n", "\n", " The 1-Minute Problem:\n", "\n", " Noise: Extremely high noise-to-signal ratio.\n", "\n", " Latency: Your reaction time vs. market speed.\n", "\n", " Transaction Costs: Can quickly kill profitability on small moves. Your average win needs to be significantly larger than costs.\n", "\n", " Alternative ML Approaches:\n", "\n", " Reinforcement Learning: Can learn trading policies directly. Very complex.\n", "\n", " LSTMs/Transformers: For sequence modeling, but require significant data and expertise.\n", "\n", " Simplicity First: Start with a very simple model and strategy. Complexity doesn't always mean better results and makes debugging harder.\n", "\n", "This is a massive undertaking. Be patient, learn continuously, and manage your risk (even paper money risk, as it builds habits). Good luck!" ] }, { "cell_type": "markdown", "id": "0a7209fb-f7aa-4eb7-aacc-13a929b9e268", "metadata": {}, "source": [ "
" ] }, { "cell_type": "markdown", "id": "209900ba-c211-4dbb-8356-03b587efc15d", "metadata": {}, "source": [ "### 2nd Prompt" ] }, { "cell_type": "markdown", "id": "b1072761-203f-42b3-93be-b53a5c73483d", "metadata": {}, "source": [ "2. After step 2: Data Acquisition I got an error like this: ---------------------------------------------------------------------------\n", "AttributeError Traceback (most recent call last)\n", "Cell In[5], line 61\n", "59 # Example usage:\n", "60 symbol = \"BTC/USD\" # Alpaca uses \"BTC/USD\" for crypto pairs\n", "---> 61 timeframe = tradeapi. টাইমফ্রেম.মিনিট # or \"1Min\" for older SDK versions\n", "62 # Fetch last 30 days of data for example\n", "63 start_date = (datetime.now() - timedelta(days=30)).strftime('%Y-%m-%d')\n", "\n", "AttributeError: module 'alpaca_trade_api' has no attribute 'টাইমফ্রেম'.\n", "Can you fix it?" ] }, { "cell_type": "markdown", "id": "d60e7a15-2ad6-4b70-b797-ed70113f6302", "metadata": {}, "source": [ "### 2nd Attempt" ] }, { "cell_type": "code", "execution_count": 1, "id": "e60edfaf-ba96-4553-baf2-dbd2b0ec90ec", "metadata": {}, "outputs": [], "source": [ "import warnings\n", "warnings.filterwarnings('ignore')" ] }, { "cell_type": "code", "execution_count": 2, "id": "2e661658-394d-48cf-b607-8ffe888551e3", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Attempting to connect to Alpaca Paper Trading...\n", "Successfully connected. Account status: ACTIVE\n", "Fetching data from 2025-05-02T00:00:00+00:00 to 2025-05-08T00:00:00+00:00\n", "Fetching data from 2025-05-08T00:01:00+00:00 to 2025-05-09T00:00:00+00:00\n", "\n", "Fetched 4349 rows of BTC/USD data.\n", "Sample data (first 5 rows):\n", " close high low trade_count \\\n", "timestamp \n", "2025-05-02 00:00:00+00:00 96599.4770 96599.4770 96599.4770 0 \n", "2025-05-02 00:02:00+00:00 96650.8100 96650.8100 96650.8100 0 \n", "2025-05-02 00:03:00+00:00 96588.0115 96588.0115 96588.0115 0 \n", "2025-05-02 00:05:00+00:00 96639.6100 96639.6100 96639.6100 0 \n", "2025-05-02 00:08:00+00:00 96521.6100 96584.7550 96521.6100 0 \n", "\n", " open volume vwap symbol \n", "timestamp \n", "2025-05-02 00:00:00+00:00 96599.4770 0.0 96599.4770 BTC/USD \n", "2025-05-02 00:02:00+00:00 96650.8100 0.0 96650.8100 BTC/USD \n", "2025-05-02 00:03:00+00:00 96588.0115 0.0 96588.0115 BTC/USD \n", "2025-05-02 00:05:00+00:00 96639.6100 0.0 96639.6100 BTC/USD \n", "2025-05-02 00:08:00+00:00 96584.7550 0.0 96553.1825 BTC/USD \n", "\n", "Sample data (last 5 rows):\n", " close high low trade_count \\\n", "timestamp \n", "2025-05-08 23:55:00+00:00 103121.6815 103121.6815 103121.6815 0 \n", "2025-05-08 23:56:00+00:00 103170.6190 103170.6190 103170.6190 0 \n", "2025-05-08 23:57:00+00:00 103204.4250 103204.4250 103204.4250 0 \n", "2025-05-08 23:58:00+00:00 103284.7150 103284.7150 103284.7150 0 \n", "2025-05-09 00:00:00+00:00 103206.1315 103267.4635 103196.4550 1 \n", "\n", " open volume vwap symbol \n", "timestamp \n", "2025-05-08 23:55:00+00:00 103121.6815 0.000000 103121.6815 BTC/USD \n", "2025-05-08 23:56:00+00:00 103170.6190 0.000000 103170.6190 BTC/USD \n", "2025-05-08 23:57:00+00:00 103204.4250 0.000000 103204.4250 BTC/USD \n", "2025-05-08 23:58:00+00:00 103284.7150 0.000000 103284.7150 BTC/USD \n", "2025-05-09 00:00:00+00:00 103230.7000 0.000012 103230.7000 BTC/USD \n" ] } ], "source": [ "import os\n", "import alpaca_trade_api as tradeapi\n", "import pandas as pd\n", "from datetime import datetime, timedelta\n", "import time\n", "\n", "# For Paper Trading\n", "os.environ['APCA_API_BASE_URL'] = 'https://paper-api.alpaca.markets'\n", "# Replace with your actual keys or set them as environment variables\n", "# Make sure these are set correctly if you haven't already\n", "os.environ['APCA_API_KEY_ID'] = 'your API KEY'\n", "os.environ['APCA_API_SECRET_KEY'] = 'your SECRET KEY'\n", "# os.environ['APCA_API_KEY_ID'] = 'YOUR_PAPER_API_KEY_ID'\n", "# os.environ['APCA_API_SECRET_KEY'] = 'YOUR_PAPER_API_SECRET_KEY'\n", "\n", "API_KEY = os.getenv('APCA_API_KEY_ID')\n", "API_SECRET = os.getenv('APCA_API_SECRET_KEY')\n", "BASE_URL = os.getenv('APCA_API_BASE_URL')\n", "\n", "# Ensure API_KEY and API_SECRET are not None\n", "if not API_KEY or not API_SECRET:\n", " raise ValueError(\"API_KEY or API_SECRET not set. Please set them as environment variables or directly in the script.\")\n", "\n", "api = tradeapi.REST(API_KEY, API_SECRET, BASE_URL, api_version='v2')\n", "\n", "print(\"Attempting to connect to Alpaca Paper Trading...\")\n", "try:\n", " account = api.get_account()\n", " print(f\"Successfully connected. Account status: {account.status}\")\n", "except Exception as e:\n", " print(f\"Failed to connect or get account info: {e}\")\n", " # Exit or handle if connection fails\n", " exit()\n", "\n", "\n", "def fetch_data(symbol, timeframe_enum, start_date_str, end_date_str=None): # Changed timeframe to timeframe_enum\n", " \"\"\"Fetches historical crypto data from Alpaca.\"\"\"\n", " if end_date_str is None:\n", " end_date_str = datetime.now().strftime('%Y-%m-%d')\n", "\n", " all_bars = []\n", " start_dt = pd.to_datetime(start_date_str, utc=True)\n", " end_dt = pd.to_datetime(end_date_str, utc=True)\n", "\n", " current_start = start_dt\n", " while current_start < end_dt:\n", " # Calculate chunk_end, ensuring it doesn't exceed end_dt\n", " # For 1-minute data, 10000 bars is approx 6.94 days. Let's use 6 days to be safe.\n", " chunk_end_candidate = current_start + timedelta(days=6)\n", " chunk_end = min(chunk_end_candidate, end_dt)\n", "\n", " print(f\"Fetching data from {current_start.isoformat()} to {chunk_end.isoformat()}\")\n", "\n", " try:\n", " bars = api.get_crypto_bars(\n", " symbol,\n", " timeframe_enum, # Use the passed enum\n", " start=current_start.isoformat(),\n", " end=chunk_end.isoformat(), # Ensure end is also passed\n", " limit=10000\n", " ).df\n", " except Exception as e:\n", " print(f\"Error fetching data chunk: {e}\")\n", " # Decide how to handle: break, retry, or skip chunk\n", " current_start = chunk_end # Move to next potential period\n", " time.sleep(5) # Wait longer if an error occurred\n", " continue\n", "\n", "\n", " if bars.empty:\n", " print(f\"No data found for chunk starting {current_start.isoformat()}\")\n", " if current_start >= end_dt and not all_bars: # if first chunk and no data\n", " break\n", " current_start = chunk_end # Move to next chunk period\n", " time.sleep(1) # Be nice to the API\n", " continue\n", "\n", " all_bars.append(bars)\n", " if not bars.index.empty:\n", " # Move current_start to the timestamp of the last bar fetched + 1 unit of timeframe\n", " # For TimeFrame.Minute, this is +1 minute\n", " if timeframe_enum == tradeapi.TimeFrame.Minute:\n", " current_start = bars.index[-1].to_pydatetime() + pd.Timedelta(minutes=1)\n", " elif timeframe_enum == tradeapi.TimeFrame.Hour:\n", " current_start = bars.index[-1].to_pydatetime() + pd.Timedelta(hours=1)\n", " # Add other timeframes if needed\n", " else: # Default for Day or others\n", " current_start = bars.index[-1].to_pydatetime() + pd.Timedelta(days=1)\n", "\n", " # Ensure current_start does not go beyond end_dt excessively in the loop condition\n", " if current_start > end_dt and chunk_end_candidate >= end_dt :\n", " break\n", " else: # Should not happen if bars is not empty, but as a safeguard\n", " current_start = chunk_end\n", "\n", " time.sleep(1) # Respect API rate limits\n", "\n", " if not all_bars:\n", " print(\"No data fetched. Check your date range, symbol, or API connection.\")\n", " return pd.DataFrame()\n", "\n", " df = pd.concat(all_bars)\n", " df = df[~df.index.duplicated(keep='first')] # Remove potential duplicates\n", " df = df.sort_index() # Ensure chronological order\n", "\n", " # Filter to ensure we are within the originally requested start_dt and end_dt precisely\n", " df = df[(df.index >= start_dt) & (df.index <= end_dt)]\n", " return df\n", "\n", "# Example usage:\n", "symbol = \"BTC/USD\" # Alpaca uses \"BTC/USD\" for crypto pairs\n", "# CORRECTED TIMEFRAME:\n", "timeframe_to_use = tradeapi.TimeFrame.Minute # Corrected\n", "# Alternative: timeframe_to_use = \"1Min\" (as a string, for some SDK versions or if TimeFrame enum is problematic)\n", "\n", "# Fetch last 7 days of data for example (a smaller range for quicker testing)\n", "start_date = (datetime.now() - timedelta(days=7)).strftime('%Y-%m-%d')\n", "# end_date = datetime.now().strftime('%Y-%m-%d') # Fetches up to now by default if None\n", "\n", "btc_data = fetch_data(symbol, timeframe_to_use, start_date)\n", "\n", "if not btc_data.empty:\n", " print(f\"\\nFetched {len(btc_data)} rows of {symbol} data.\")\n", " print(\"Sample data (first 5 rows):\")\n", " print(btc_data.head())\n", " print(\"\\nSample data (last 5 rows):\")\n", " print(btc_data.tail())\n", "else:\n", " print(\"Failed to fetch data or no data available for the period.\")" ] }, { "cell_type": "markdown", "id": "2dd66ca6-84d0-4ecb-b0be-910aba094c32", "metadata": {}, "source": [ "Key changes:\n", "\n", " timeframe = tradeapi.TimeFrame.Minute: This is the crucial fix.\n", "\n", " I renamed the timeframe parameter in fetch_data to timeframe_enum to avoid confusion if you were to pass the string version.\n", "\n", " Added a check for API_KEY and API_SECRET to ensure they are loaded.\n", "\n", " Improved the connection check with a try-except block.\n", "\n", " Refined the chunking logic in fetch_data slightly, particularly how current_start is advanced and how chunk_end is determined to better respect end_dt.\n", "\n", " Made the advancement of current_start adaptive to the timeframe_enum.\n", "\n", " Final filtering of the concatenated DataFrame to strictly adhere to start_dt and end_dt.\n", "\n", " Reduced the default fetch range in the example to 7 days for faster initial testing.\n", "\n", "Important:\n", "You will also need to make the same correction in Step 6 (Signal Generation & Order Execution) inside the get_latest_bar_features function:" ] }, { "cell_type": "markdown", "id": "4e2810ef-1af9-4406-b2f8-7e025fc20d83", "metadata": {}, "source": [ "Step 3: Feature Engineering\n", "\n", "This is where you create signals for your model. pandas-ta is a great library for this." ] }, { "cell_type": "code", "execution_count": 3, "id": "00bea5b7-3fa8-475d-86c8-4f67199e06c4", "metadata": {}, "outputs": [ { "ename": "ImportError", "evalue": "cannot import name 'NaN' from 'numpy' (C:\\Users\\micro\\anaconda3\\envs\\alpaca_AI_env\\Lib\\site-packages\\numpy\\__init__.py)", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mImportError\u001b[0m Traceback (most recent call last)", "Cell \u001b[1;32mIn[3], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mpandas_ta\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mta\u001b[39;00m\n\u001b[0;32m 3\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mcreate_features\u001b[39m(df):\n\u001b[0;32m 4\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m df\u001b[38;5;241m.\u001b[39mempty \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mclose\u001b[39m\u001b[38;5;124m'\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m df\u001b[38;5;241m.\u001b[39mcolumns:\n", "File \u001b[1;32m~\\anaconda3\\envs\\alpaca_AI_env\\Lib\\site-packages\\pandas_ta\\__init__.py:116\u001b[0m\n\u001b[0;32m 97\u001b[0m EXCHANGE_TZ \u001b[38;5;241m=\u001b[39m {\n\u001b[0;32m 98\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNZSX\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;241m12\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mASX\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;241m11\u001b[39m,\n\u001b[0;32m 99\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTSE\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;241m9\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mHKE\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;241m8\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSSE\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;241m8\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSGX\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;241m8\u001b[39m,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 102\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mBMF\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m2\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNYSE\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m4\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTSX\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m4\u001b[39m\n\u001b[0;32m 103\u001b[0m }\n\u001b[0;32m 105\u001b[0m RATE \u001b[38;5;241m=\u001b[39m {\n\u001b[0;32m 106\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDAYS_PER_MONTH\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;241m21\u001b[39m,\n\u001b[0;32m 107\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mMINUTES_PER_HOUR\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;241m60\u001b[39m,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 113\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mYEARLY\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;241m1\u001b[39m,\n\u001b[0;32m 114\u001b[0m }\n\u001b[1;32m--> 116\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpandas_ta\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcore\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;241m*\u001b[39m\n", "File \u001b[1;32m~\\anaconda3\\envs\\alpaca_AI_env\\Lib\\site-packages\\pandas_ta\\core.py:18\u001b[0m\n\u001b[0;32m 16\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpandas_ta\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcandles\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;241m*\u001b[39m\n\u001b[0;32m 17\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpandas_ta\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcycles\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;241m*\u001b[39m\n\u001b[1;32m---> 18\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpandas_ta\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmomentum\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;241m*\u001b[39m\n\u001b[0;32m 19\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpandas_ta\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01moverlap\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;241m*\u001b[39m\n\u001b[0;32m 20\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpandas_ta\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mperformance\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;241m*\u001b[39m\n", "File \u001b[1;32m~\\anaconda3\\envs\\alpaca_AI_env\\Lib\\site-packages\\pandas_ta\\momentum\\__init__.py:34\u001b[0m\n\u001b[0;32m 32\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01msmi\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m smi\n\u001b[0;32m 33\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01msqueeze\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m squeeze\n\u001b[1;32m---> 34\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01msqueeze_pro\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m squeeze_pro\n\u001b[0;32m 35\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mstc\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m stc\n\u001b[0;32m 36\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mstoch\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m stoch\n", "File \u001b[1;32m~\\anaconda3\\envs\\alpaca_AI_env\\Lib\\site-packages\\pandas_ta\\momentum\\squeeze_pro.py:2\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;66;03m# -*- coding: utf-8 -*-\u001b[39;00m\n\u001b[1;32m----> 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mnumpy\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m NaN \u001b[38;5;28;01mas\u001b[39;00m npNaN\n\u001b[0;32m 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpandas\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m DataFrame\n\u001b[0;32m 4\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpandas_ta\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmomentum\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m mom\n", "\u001b[1;31mImportError\u001b[0m: cannot import name 'NaN' from 'numpy' (C:\\Users\\micro\\anaconda3\\envs\\alpaca_AI_env\\Lib\\site-packages\\numpy\\__init__.py)" ] } ], "source": [ "import pandas_ta as ta\n", "\n", "def create_features(df):\n", " if df.empty or 'close' not in df.columns:\n", " print(\"DataFrame is empty or 'close' column is missing.\")\n", " return df\n", "\n", " df.ta.sma(length=10, append=True, col_names=('SMA_10'))\n", " df.ta.sma(length=30, append=True, col_names=('SMA_30'))\n", " df.ta.ema(length=10, append=True, col_names=('EMA_10'))\n", " df.ta.rsi(length=14, append=True, col_names=('RSI_14'))\n", " df.ta.macd(append=True, col_names=('MACD_12_26_9', 'MACDh_12_26_9', 'MACDs_12_26_9'))\n", " df.ta.bbands(length=20, append=True, col_names=('BBL_20_2.0', 'BBM_20_2.0', 'BBU_20_2.0', 'BBB_20_2.0', 'BBP_20_2.0'))\n", " df.ta.atr(length=14, append=True, col_names=('ATR_14'))\n", "\n", " # Lagged returns\n", " for lag in [1, 3, 5, 10]:\n", " df[f'return_{lag}m'] = df['close'].pct_change(periods=lag)\n", "\n", " # Add more features: volatility, momentum, volume-based if available, etc.\n", " # e.g., log returns, price relative to moving average, etc.\n", "\n", " df.dropna(inplace=True) # Remove rows with NaNs created by indicators\n", " return df\n", "\n", "if not btc_data.empty:\n", " featured_data = create_features(btc_data.copy()) # Use .copy() to avoid modifying original\n", " print(\"\\nData with features:\")\n", " print(featured_data.head())\n", "else:\n", " print(\"Cannot create features, btc_data is empty.\")\n", " featured_data = pd.DataFrame() # ensure it's a DataFrame" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.3" } }, "nbformat": 4, "nbformat_minor": 5 }