Spaces:
Build error
Build error
File size: 5,954 Bytes
0a3ce35 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 |
import tabula
import yfinance as yfi
import sqlite3
import pandas as pd
import json
import talib
import time
import datetime
import warnings
warnings.filterwarnings("ignore")
with open("patterns.json", "r") as f:
patterns = json.load(f)
update_time = datetime.datetime.now()
# dummy update time
def find_patterns(df):
result = pd.DataFrame(
columns=[
"Date",
"Kode",
"Pattern",
"Pattern_Score",
"Open_Close_Change",
"High_Low_Change",
]
)
for attr, pattern in patterns.items():
scores = getattr(talib, attr)(df["Open"], df["High"], df["Low"], df["Close"])
mask = scores != 0
temp_result = df[mask]
if len(temp_result) > 0:
temp_result = temp_result.assign(
Open_Close_Change=(temp_result["Close"] - temp_result["Open"]) / temp_result["Open"],
High_Low_Change=(temp_result["High"] - temp_result["Low"]) / temp_result["Low"],
Pattern=[pattern] * len(temp_result),
Pattern_Score=scores[mask].values,
)[result.columns]
result = result.append(temp_result)
result = result.assign(time_updated = update_time)
return result
def pull_data_yfi():
start = time.time()
with sqlite3.connect("ihsg.db") as con:
tickers = pd.read_sql(
"""
SELECT Kode FROM list_perusahaan
WHERE Kode != "IHSG"
""",
con=con,
).values.flatten()
ihsg = (
yfi.download("^JKSE", start="2017-01-01", end="2023-01-10", progress=False)
.reset_index()
.dropna()
.assign(Kode="IHSG")
)
ihsg = ihsg[["Date", "Kode", "Open", "High", "Low", "Close", "Volume"]]
ihsg = ihsg.assign(time_updated = update_time)
ihsg.to_sql("historical", if_exists="replace", con=con, index=False)
pattern_search = find_patterns(ihsg)
pattern_search.to_sql("patterns", if_exists="replace", con=con, index=False)
print("INSERTION RESULT: \n")
print(pd.read_sql("SELECT * FROM historical", con=con).tail(10))
print(pd.read_sql("SELECT * FROM historical", con=con).shape)
print("\n\n*--\n")
for i in range(0, len(tickers), 50):
ticker = [f"{kode}.JK" for kode in tickers[i : i + 50]]
df = (
yfi.download(ticker, start="2017-01-01", end="2023-01-10", progress=False)
.T.unstack(level=1)
.T.reset_index()
.dropna()
.rename(columns={"level_1": "Kode"})
)
df = df[["Date", "Kode", "Open", "High", "Low", "Close", "Volume"]]
df["Kode"] = df["Kode"].str.replace(".JK", "")
for j, kode in enumerate(df["Kode"].unique()):
print(f"Finding Patterns for {kode} #{i+j+1}\t\t time elapsed = {time.time() - start:.2f} s")
pattern_search = find_patterns(df[df["Kode"] == kode])
pattern_search.to_sql("patterns", if_exists="append", con=con, index=False)
df = df.assign(time_updated = update_time)
df.to_sql("historical", if_exists="append", con=con, index=False)
print("INSERTION RESULT: \n")
print(pd.read_sql("SELECT * FROM historical", con=con).tail(10))
print(pd.read_sql("SELECT * FROM historical", con=con).shape)
print("\n\n*--\n")
time.sleep(60)
con.commit()
def pull_data_klasifikasi_industri():
with sqlite3.connect("ihsg.db") as con:
cur = con.cursor()
cur.execute("DROP TABLE IF EXISTS list_perusahaan")
cur.execute("""
CREATE TABLE list_perusahaan (
Kode VARCHAR(4),
Nama TEXT,
Sektor TEXT,
Instrumen TEXT)
""")
cur.execute("""
INSERT INTO list_perusahaan VALUES
('IHSG', 'Indeks Harga Saham Gabungan', NULL, 'Indeks')
""")
# TODO: Change Schema from Star Schema to Snowflake Schema
# list_perusahaan table will be the dimension table for sector and sub-sector fact tables
# note: list_perusahaan table is a dimension table for historical fact table
dfs = tabula.read_pdf("Klasifikasi Industri Perusahaan Tercatat.pdf", pages="all", stream=True)
# print(len(dfs))
for df in dfs:
kode, nama, sektor = None, None, None
for row in df.iloc[2:,:].itertuples():
if kode is not None and pd.notna(row[2]):
cur.execute(f"""
INSERT INTO list_perusahaan VALUES
('{kode}', '{nama}', '{sektor}', 'Saham')
""")
kode, nama, sektor = None, None, None
elif kode is not None and pd.isna(row[2]):
if pd.notna(row[3]):
nama += " " + row[3]
if pd.notna(row[5]):
sektor += " " + row[5]
if kode is None and nama is None and sektor is None and pd.notna(row[2]):
if "saham" in row[8].lower():
kode = row[2]
nama = row[3]
sektor = row[5]
else:
if kode is not None:
cur.execute(f"""
INSERT INTO list_perusahaan VALUES
('{kode}', '{nama}', '{sektor}', 'Saham')
""")
print("INSERTION RESULT: \n")
print(pd.read_sql("SELECT * FROM list_perusahaan", con=con).tail(10))
print(pd.read_sql("SELECT * FROM list_perusahaan", con=con).shape)
print("\n\n*--\n")
con.commit()
if __name__ == "__main__":
pull_data_klasifikasi_industri()
pull_data_yfi()
|