microsoft · PaleNeutron · Jun 16, 2023 · Jun 16, 2023 · Jun 18, 2023 · Jun 18, 2023
diff --git a/qlib/contrib/strategy/cost_control.py b/qlib/contrib/strategy/cost_control.py
@@ -13,16 +13,11 @@
 class SoftTopkStrategy(WeightStrategyBase):
     def __init__(
         self,
-        model,
-        dataset,
         topk,
         order_generator_cls_or_obj=OrderGenWInteract,
         max_sold_weight=1.0,
         risk_degree=0.95,
         buy_method="first_fill",
-        trade_exchange=None,
-        level_infra=None,
-        common_infra=None,
         **kwargs,
     ):
         """
@@ -37,7 +32,8 @@ def __init__(
                 average_fill: assign the weight to the stocks rank high averagely.
         """
         super(SoftTopkStrategy, self).__init__(
-            model, dataset, order_generator_cls_or_obj, trade_exchange, level_infra, common_infra, **kwargs
+            order_generator_cls_or_obj=order_generator_cls_or_obj,
+            **kwargs,
         )
         self.topk = topk
         self.max_sold_weight = max_sold_weight
@@ -89,13 +85,15 @@ def generate_target_weight_position(self, score, current, trade_start_time, trad
                         max(1 / self.topk - final_stock_weight.get(stock_id, 0), 0.0),
                         sold_stock_weight,
                     )
-                    final_stock_weight[stock_id] = final_stock_weight.get(stock_id, 0.0) + add_weight
+                    final_stock_weight[stock_id] = (
+                        final_stock_weight.get(stock_id, 0.0) + add_weight
+                    )
                     sold_stock_weight -= add_weight
             elif self.buy_method == "average_fill":
                 for stock_id in buy_signal_stocks:
-                    final_stock_weight[stock_id] = final_stock_weight.get(stock_id, 0.0) + sold_stock_weight / len(
-                        buy_signal_stocks
-                    )
+                    final_stock_weight[stock_id] = final_stock_weight.get(
+                        stock_id, 0.0
+                    ) + sold_stock_weight / len(buy_signal_stocks)
             else:
         black . -l 120 --check --diff 
 - id: flake8 
   args: ["--ignore=E501,F541,E266,E402,W503,E731,E203"] 
         black . -l 120 --check --diff 
 - id: flake8 
   args: ["--ignore=E501,F541,E266,E402,W503,E731,E203"] 
                 raise ValueError("Buy method not found")
         return final_stock_weight
diff --git a/qlib/contrib/strategy/signal_strategy.py b/qlib/contrib/strategy/signal_strategy.py
@@ -333,7 +333,7 @@ def generate_target_weight_position(self, score, current, trade_start_time, trad
 
         Parameters
         -----------
-        score : pd.Series
 pred_score = self.signal.get_signal(start_time=pred_start_time, end_time=pred_end_time) 
 pred_score = self.signal.get_signal(start_time=pred_start_time, end_time=pred_end_time) 
+        score : pd.DataFrame
             pred score for this trade date, index is stock_id, contain 'score' column.
         current : Position()
             current position.

diff --git a/qlib/data/pit.py b/qlib/data/pit.py
@@ -40,7 +40,7 @@ def _load_internal(self, instrument, start_index, end_index, freq):
                 s = self._load_feature(instrument, -start_ws, 0, cur_time)
                 resample_data[cur_index - start_index] = s.iloc[-1] if len(s) > 0 else np.nan
             except FileNotFoundError:
-                get_module_logger("base").warning(f"WARN: period data not found for {str(self)}")
+                get_module_logger("base").warning(f"WARN: period data not found for {instrument} {str(self)} ({freq})")
                 return pd.Series(dtype="float32", name=str(self))
 
         resample_series = pd.Series(

diff --git a/qlib/data/storage/file_storage.py b/qlib/data/storage/file_storage.py
@@ -80,6 +80,7 @@ def __init__(self, freq: str, future: bool, provider_uri: dict = None, **kwargs)
         self._provider_uri = None if provider_uri is None else C.DataPathManager.format_provider_uri(provider_uri)
         self.enable_read_cache = True  # TODO: make it configurable
         self.region = C["region"]
+        self.uri.parent.mkdir(parents=True, exist_ok=True)
 
     @property
     def file_name(self) -> str:
@@ -90,7 +91,7 @@ def _freq_file(self) -> str:
         """the freq to read from file"""
         if not hasattr(self, "_freq_file_cache"):
             freq = Freq(self.freq)
-            if freq not in self.support_freq:
+            if self.support_freq and freq not in self.support_freq:
                 # NOTE: uri
                 #   1. If `uri` does not exist
                 #       - Get the `min_uri` of the closest `freq` under the same "directory" as the `uri`
@@ -200,6 +201,7 @@ def __init__(self, market: str, freq: str, provider_uri: dict = None, **kwargs):
         super(FileInstrumentStorage, self).__init__(market, freq, **kwargs)
         self._provider_uri = None if provider_uri is None else C.DataPathManager.format_provider_uri(provider_uri)
         self.file_name = f"{market.lower()}.txt"
+        self.uri.parent.mkdir(parents=True, exist_ok=True)
 
     def _read_instrument(self) -> Dict[InstKT, InstVT]:
         if not self.uri.exists():
@@ -234,7 +236,6 @@ def _write_instrument(self, data: Dict[InstKT, InstVT] = None) -> None:
         df.loc[:, [self.SYMBOL_FIELD_NAME, self.INSTRUMENT_START_FIELD, self.INSTRUMENT_END_FIELD]].to_csv(
             self.uri, header=False, sep=self.INSTRUMENT_SEP, index=False
         )
-        df.to_csv(self.uri, sep="\t", encoding="utf-8", header=False, index=False)
 
     def clear(self) -> None:
         self._write_instrument(data={})
@@ -289,6 +290,7 @@ def __init__(self, instrument: str, field: str, freq: str, provider_uri: dict =
         super(FileFeatureStorage, self).__init__(instrument, field, freq, **kwargs)
         self._provider_uri = None if provider_uri is None else C.DataPathManager.format_provider_uri(provider_uri)
         self.file_name = f"{instrument.lower()}/{field.lower()}.{freq.lower()}.bin"
+        self.uri.parent.mkdir(parents=True, exist_ok=True)
 
     def clear(self):
         with self.uri.open("wb") as _:
@@ -320,15 +322,15 @@ def write(self, data_array: Union[List, np.ndarray], index: int = None) -> None:
                 # rewrite
                 with self.uri.open("rb+") as fp:
                     _old_data = np.fromfile(fp, dtype="<f")
-                    _old_index = _old_data[0]
+                    _old_index = int(_old_data[0])
                     _old_df = pd.DataFrame(
                         _old_data[1:], index=range(_old_index, _old_index + len(_old_data) - 1), columns=["old"]
                     )
                     fp.seek(0)
                     _new_df = pd.DataFrame(data_array, index=range(index, index + len(data_array)), columns=["new"])
                     _df = pd.concat([_old_df, _new_df], sort=False, axis=1)
                     _df = _df.reindex(range(_df.index.min(), _df.index.max() + 1))
-                    _df["new"].fillna(_df["old"]).values.astype("<f").tofile(fp)
+                    np.hstack([_old_index, _df["new"].fillna(_df["old"]).values]).astype("<f").tofile(fp)
 
     @property
     def start_index(self) -> Union[int, None]: