from datafeed.factor_utils import FactorUtils
import numpy as np
import pandas as pd
class FactorBuilder:
def __init__(self, df):
self.df = df
context = {}
for method_name in dir(FactorUtils):
if not method_name.startswith('_'):
method = getattr(FactorUtils, method_name)
context[method_name] = method
context[method_name.upper()] = method
# Add math functions to context
math_funcs = {
'LOG': np.log, 'EXP': np.exp, 'SQRT': np.sqrt, 'ABS': np.abs,
'SIN': np.sin, 'COS': np.cos, 'TAN': np.tan, 'POWER': np.power,
'SIGN': np.sign, 'MAX': np.maximum, 'MIN': np.minimum,
'MEAN': np.mean, 'STD': np.std
}
context.update(math_funcs)
# Add numpy and pandas to context
context['np'] = np
context['pd'] = pd
self.context = context
self.update_base_factors()
def update_base_factors(self):
for c in ['open', 'high', 'low', 'close', 'volume']:
data = self.df[['date','symbol',c]]
data = data.set_index(['date', 'symbol'])
data = data[c]
self.context[c.upper()] = data
def calc_formula(self, expr: str):
try:
context = self.context
result = eval(expr.upper(), context)
print(result)
print(f"Result type: {type(result)}")
if not isinstance(result, pd.Series):
result = pd.Series(result)
# Ensure result has correct index
if not isinstance(result.index, pd.MultiIndex):
result = pd.Series(result, index=pd.MultiIndex.from_tuples(
[(d, s) for d, s in zip(result.index, result.index)],
names=['date', 'symbol']
))
# Ensure index names are correct
if result.index.names != ['date', 'symbol']:
result.index.names = ['date', 'symbol']
# Ensure index is unique
result = result[~result.index.duplicated(keep='first')]
result.name = expr
return result
#return result.to_frame(name='value')
except Exception as e:
print(f"Formula execution error: {str(e)}")
print("Functions available in context:")
for key in sorted(context.keys()):
if callable(context[key]):
print(f"- {key}")
raise
def calc_formulas(self, expr_list:list[str]):
datas = []
for expr in expr_list:
result = self.calc_formula(expr)
datas.append(result)
df = self.df.copy(deep=True)
df = df.set_index(['date','symbol'])
datas.append(df)
all = pd.concat(datas, axis=1)
all = all.reset_index(level=1)
return all
if __name__ == '__main__':
from datafeed.csv_dataloader import CsvDataLoader
df = CsvDataLoader().read_df(symbols=['510300.SH', '159915.SZ'])
print(df)
result = FactorBuilder(df).calc_formula('ATR(close,high,low,20)')
print(type(result))
result = FactorBuilder(df).calc_formulas(['ATR(close,high,low,20)','roc100(close,20)','roc(close,20)'])
print(type(result),result)
df_close = result.pivot_table(values='close', index=result.index, columns='symbol')
print(df_close)
df_close = result.pivot_table(values='roc(close,20)', index=result.index, columns='symbol')
print(df_close)
df_close = result.pivot_table(values='roc100(close,20)', index=result.index, columns='symbol')
print(df_close)