File size: 3,676 Bytes
74bc39f
 
 
 
 
 
 
500d236
74bc39f
 
 
 
 
 
 
 
 
 
 
 
 
 
500d236
74bc39f
 
 
 
 
 
 
 
 
 
 
 
500d236
74bc39f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
500d236
74bc39f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# space/tools/ts_preprocess.py
import pandas as pd
import numpy as np
from typing import List

MONTH = "MS"  # month-start frequency


def _emi(principal: float, annual_rate: float, n_months: int) -> float:
    """
    EMI formula with monthly compounding.
    r_m = annual_rate / 12
    EMI = P * r_m * (1+r_m)^n / ((1+r_m)^n - 1)
    """
    if n_months <= 0 or principal <= 0:
        return 0.0
    r = annual_rate / 12.0
    if r <= 0:
        return principal / n_months
    fac = (1.0 + r) ** n_months
    return principal * r * fac / (fac - 1.0)


def _project_deposit(principal: float, annual_rate: float, months: int) -> pd.DataFrame:
    """
    Monthly path for a deposit. Value compounds monthly.
    """
    r = annual_rate / 12.0
    data = []
    bal = principal
    for m in range(months + 1):
        data.append({"step": m, "portfolio_value": bal})
        bal = bal * (1.0 + r)
    return pd.DataFrame(data)


def _project_asset(principal: float, annual_rate: float, tenor_months: int) -> pd.DataFrame:
    """
    Monthly amortization schedule for an asset/loan using EMI.
    """
    emi = _emi(principal, annual_rate, tenor_months)
    r = annual_rate / 12.0
    data = []
    bal = principal
    for m in range(tenor_months + 1):
        interest = bal * r
        principal_pay = max(0.0, emi - interest)
        next_bal = max(0.0, bal - principal_pay)
        data.append({
            "step": m,
            "portfolio_value": bal,
            "emi": emi,
            "interest_component": interest,
            "principal_component": principal_pay,
            "remaining_balance": next_bal
        })
        bal = next_bal
    return pd.DataFrame(data)


def build_timeseries(df: pd.DataFrame) -> pd.DataFrame:
    """
    Input df columns (example):
      - portfolio_date (datetime or str)
      - instrument_type: 'Deposit' or 'Asset'
      - balance: float
      - interest_rate: annual rate (e.g., 0.12)
      - time_to_maturity: months (int)
      - tenor_months: months (for Assets; if missing, fallback to time_to_maturity)
    Output:
      Long time-series with monthly timestamps, projected 'portfolio_value'
      (and EMI breakdown for Assets).
    """
    df = df.copy()
    if "timestamp" not in df.columns:
        df["timestamp"] = pd.to_datetime(df["portfolio_date"])

    out_frames: List[pd.DataFrame] = []
    for _, row in df.iterrows():
        itype = str(row.get("instrument_type", "")).strip().lower()
        start = pd.to_datetime(row["timestamp"])
        months = int(row.get("time_to_maturity", 0) or 0)
        principal = float(row.get("balance", 0.0) or 0.0)
        annual_rate = float(row.get("interest_rate", 0.0) or 0.0)

        if itype == "deposit":
            sched = _project_deposit(principal, annual_rate, months)
        elif itype == "asset":
            tenor = int(row.get("tenor_months", months) or months or 0)
            sched = _project_asset(principal, annual_rate, tenor)
        else:
            # unknown types: keep flat
            sched = pd.DataFrame({"step": range(months + 1), "portfolio_value": principal})

        # Build timestamps: month-start frequency
        sched["timestamp"] = pd.date_range(start=start, periods=len(sched), freq=MONTH)
        # Carry identifiers
        for col in ["instrument_type", "interest_rate"]:
            if col in df.columns:
                sched[col] = row.get(col)
        sched["origin_portfolio_date"] = start
        sched["origin_balance"] = principal

        out_frames.append(sched)

    ts = pd.concat(out_frames, ignore_index=True)
    ts = ts.sort_values(["timestamp", "instrument_type"]).reset_index(drop=True)
    return ts