Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

kernels get killed OOM when running 1min data REG_CN #1826

Open
DanielKui opened this issue Jul 10, 2024 · 1 comment
Open

kernels get killed OOM when running 1min data REG_CN #1826

DanielKui opened this issue Jul 10, 2024 · 1 comment
Labels
question Further information is requested

Comments

@DanielKui
Copy link

DanielKui commented Jul 10, 2024

I 'm running 1min data ,the script as below, swap mem is 128G, It will be killed by OOM, why?
start time:2020-09-14
end_time: 2021-06-21

and all.txt contains 4067 stocks. I attached all.txt at the end.
I downloaded these 1min data from yahoo by:
python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data_1min --region cn --interval 1min

I used 8 kernels

how should I configure?

`#! /usr/bin/env python
import os
import qlib

import pandas as pd
from qlib.contrib.data.handler import Alpha158
from qlib.data.dataset import TSDatasetH
from qlib.contrib.model.pytorch_alstm_ts import ALSTM
from qlib.tests.data import GetData
from qlib.constant import REG_CN
from qlib.data import D
from qlib.contrib.report import analysis_model, analysis_position
import multiprocessing
from qlib.utils import exists_qlib_data, init_instance_by_config
from qlib.workflow import R
from qlib.workflow.record_temp import SignalRecord, PortAnaRecord
from qlib.utils import flatten_dict

if name == "main":
multiprocessing.freeze_support()

provider_uri = "~/.qlib/qlib_data/cn_data_1min"  # target_dir

qlib.init(provider_uri=provider_uri)

GetData().qlib_data(target_dir=provider_uri, region=REG_CN, exists_skip=True)
qlib.init(provider_uri=provider_uri, region=REG_CN,kernels=8, expression_cache=None, dataset_cache=None)

D.calendar(start_time='2020-09-14', end_time='2021-06-21',freq='1min')
benchmark = "SH000300"

instruments = D.instruments(market='all')
#print (type(instruments))
#print (instruments)

stock_list = D.list_instruments(instruments=instruments,
start_time='2020-09-14',
end_time='2021-06-21',
freq = '1min',
as_list=True)

#D.calendar()

# 设置日期、股票池等参数
data_handler_config = {
"start_time":"2020-09-14",
"end_time":"2021-06-21",
"fit_start_time":"2020-09-20",
"fit_end_time":"2021-06-18",
"freq":'1min',
"instruments":stock_list }

h = Alpha158(**data_handler_config)

# 获取列名(因子名称)
#print(h.get_cols())

Alpha158_df_feature = h.fetch(col_set="feature")

#print(Alpha158_df_feature)

task = {
"model": {
"class": "LGBModel",
"module_path": "qlib.contrib.model.gbdt",
"kwargs": {
"loss": "mse",
"colsample_bytree": 0.8879,
"learning_rate": 0.0421,
"subsample": 0.8789,
"lambda_l1": 205.6999,
"lambda_l2": 580.9768,
"max_depth": 8,
"num_leaves": 210,
"num_threads": 20,
        },
    },
"dataset": {
"class": "DatasetH",
"module_path": "qlib.data.dataset",
"kwargs": {
"handler": {
"class": "Alpha158",
"module_path": "qlib.contrib.data.handler",
"kwargs": data_handler_config,
            },
"segments": {
"train": ("2020-09-14", "2020-11-30"),
"valid": ("2020-12-01", "2021-02-28"),
"test": ("2021-03-01", "2021-06-20"),
            },
        },
    },
}

# model initiaiton
model = init_instance_by_config(task["model"]) #
dataset = init_instance_by_config(task["dataset"])
# start exp to train model
with R.start(experiment_name="train_model"):
        R.log_params(**flatten_dict(task))
        model.fit(dataset)  #拟合模型
        R.save_objects(trained_model=model)
        rid = R.get_recorder().id


###################################
# prediction, backtest & analysis
###################################
port_analysis_config = {
    "executor": {
        "class": "SimulatorExecutor",
        "module_path": "qlib.backtest.executor",
        "kwargs": {
            "time_per_step": "1min",
            "generate_portfolio_metrics": True,
        },
    },
    "strategy": {
        "class": "TopkDropoutStrategy",
        "module_path": "qlib.contrib.strategy.signal_strategy",
        "kwargs": {
            "model": model,
            "dataset": dataset,
            "topk": 50,
            "n_drop": 5,
        },
    },
    "backtest": {
        "start_time": "2020-09-14",
        "end_time": "2021-06-15",
        "account": 1000,
        "benchmark": benchmark,
        "exchange_kwargs": {
            "freq": "1min",
            "limit_threshold": 0.095,
            "deal_price": "close",
            "open_cost": 0.0005,
            "close_cost": 0.0015,
            "min_cost": 5,
        },
    },
}

# backtest and analysis

with R.start(experiment_name="backtest_analysis"):
    recorder = R.get_recorder(recorder_id=rid, experiment_name="train_model")
    model = recorder.load_object("trained_model")


    # prediction
    recorder = R.get_recorder()
    ba_rid = recorder.id
    sr = SignalRecord(model, dataset, recorder)
    sr.generate()

    # backtest & analysis
    par = PortAnaRecord(recorder, port_analysis_config, "1min")
    par.generate()

'''
recorder = R.get_recorder(recorder_id=ba_rid, experiment_name="backtest_analysis")

    label_df = dataset.prepare("test", col_set="label")
    label_df.columns = ["label"]

    report_normal_df = recorder.load_object("portfolio_analysis/report_normal_1min.pkl")
    positions = recorder.load_object("portfolio_analysis/positions_normal_1min.pkl")
    analysis_df = recorder.load_object("portfolio_analysis/port_analysis_1min.pkl")

    analysis_position.report_graph(report_normal_df)
    analysis_position.risk_analysis_graph(analysis_df, report_normal_df)


    pred_df = recorder.load_object("pred.pkl")
    pred_label = pd.concat([label_df, pred_df], axis=1, sort=True).reindex(label_df.index)

    print (f"label_df size:{len(label_df)}")
    print (f"pred_label size:{len(pred_label)}")
    #os._exit(0)
    ###analysis_position.score_ic_graph(pred_label)
    
    ###analysis_model.model_performance_graph(pred_label)

''' `

image
image

image

all.txt

@DanielKui DanielKui added the question Further information is requested label Jul 10, 2024
@DanielKui DanielKui changed the title kernels get killed OOM when I running 1min data REG_CN kernels get killed OOM when running 1min data REG_CN Jul 10, 2024
@SunsetWolf
Copy link
Collaborator

We think it may be caused by the machine having too little memory, we tried using your code but did not reproduce the problem. You can try it by shortening the training data, or you can try a different machine with more memory.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
question Further information is requested
Projects
None yet
Development

No branches or pull requests

2 participants