import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt

from scipy.stats import shapiro
from scipy.stats import spearmanr
from scipy.stats import pearsonr

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

from datetime import datetime, timedelta

from io import StringIO

from math import radians, cos, sin, asin, sqrt

df1 = pd.read_csv('Downloads/Metro Manila Monthly Emisions - Metro Manila Monthly CO2 Emissions from Vehicle.csv')
co2_df = df1[['source_name','start_time','emissions_quantity']]

co2_df.head()

df2 = pd.read_csv('Downloads/temps.csv')
temp_df = df2[['location_id', 'time','apparent_temperature_max (°C)']]

temp_df.head()

metadata = pd.read_csv('Downloads/metadata.csv')

metadata.head()

def sum(values):
    s = 0
    for val in values:
        s += val
    return s

co2_df['start_time'] = co2_df['start_time'].astype(str) 

start_date = datetime.strptime(co2_df.loc[0, 'start_time'][:10], "%d/%m/%Y")
end_date = datetime.strptime('01/12/2024', "%d/%m/%Y")

co2_df['start_time'] = pd.to_datetime(co2_df['start_time'].str[:10], format='%d/%m/%Y')

data = []

while start_date <= end_date:
    data.append({'time': start_date, 'emissions': sum(co2_df.loc[co2_df['start_time']==start_date, 'emissions_quantity'])})

    start_date = start_date + pd.DateOffset(months=1)

co2 = pd.DataFrame(data)

co2.head(len(co2))

C:\Users\emilm\AppData\Local\Temp\ipykernel_5064\1027906301.py:7: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  co2_df['start_time'] = co2_df['start_time'].astype(str)
C:\Users\emilm\AppData\Local\Temp\ipykernel_5064\1027906301.py:12: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  co2_df['start_time'] = pd.to_datetime(co2_df['start_time'].str[:10], format='%d/%m/%Y')

co2['time'] = pd.to_datetime(co2['time'], format="%Y-%m-%d")

plt.figure(figsize=(10, 6))
plt.plot(co2['time'], co2['emissions'], color="#2e292b")#f0d7cc, #244C33, #2e292b
plt.title('CO2 Emissions in Metro Manila')
plt.xlabel('Time')
plt.ylabel('CO2 (in tons)')

# Improve x-axis formatting for dates
plt.gcf().autofmt_xdate()  # Auto-rotate date labels

plt.tight_layout()
plt.show()

def get_distance(lat1, lon1, lat2, lon2): #computes the distance between two regions using latitude and longitude
    # Convert decimal degrees to radians
    lat1, lon1, lat2, lon2 = map(radians, [lat1, lon1, lat2, lon2])

    # Haversine formula
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * asin(sqrt(a))

    # Radius of Earth in kilometers (use 6371 for km or 3956 for miles)
    r = 6371
    return c * r

def normalize_distance(metadata): # Returns a list of normalized distance for regions idx 1-6
  metro_manila_lat = metadata.loc[0, 'latitude']
  metro_manila_lon = metadata.loc[0, 'longitude']

  distance = []
  for i in range(1, 7):
    region_lat = metadata.loc[i, 'latitude']
    region_lon = metadata.loc[i, 'longitude']

    distance.append(get_distance(metro_manila_lat, metro_manila_lon, region_lat, region_lon))

  norm_distance = []
  for d in distance:
    norm_distance.append(max(distance) - d / max(distance) - min(distance))

  return norm_distance

def normalize_elevation(metadata):
  metro_manila_elev = metadata.loc[0, 'elevation']

  elevation = []
  for i in range(1, 7):
    region_elev = metadata.loc[i, 'elevation']
    elevation.append(abs(region_elev - metro_manila_elev))

  norm_elevation = []
  for e in elevation:
    norm_elevation.append(max(elevation) - e / max(elevation) - min(elevation))

  return norm_elevation

def weighted_avg(distance, elevation, data):
  sum = 0
  sum_weight = 0

  for i in range(6):
    tot_weight = distance[i]*0.6 + elevation[i]*0.4

    sum_weight += tot_weight

    sum += data[i]*tot_weight

  return sum/sum_weight

start_date = datetime.strptime(temp_df.loc[0, 'time'], "%Y-%m-%d")
end_date = datetime.strptime('2024-12-31', "%Y-%m-%d")

print(start_date)
print(end_date)

2015-01-01 00:00:00
2024-12-31 00:00:00

distance = normalize_distance(metadata)
elevation = normalize_elevation(metadata)

rural_data = []

temp_df['time'] = pd.to_datetime(temp_df['time'], format="%Y-%m-%d")


while start_date <= end_date:
    values = temp_df.loc[temp_df['time'] == start_date, 'apparent_temperature_max (°C)'].tolist()
    
    avg = {'time':start_date, 'temp': weighted_avg(distance, elevation, values[1:])}
    rural_data.append(avg)

    start_date = start_date + pd.DateOffset(days=1)

rural_df = pd.DataFrame(rural_data)

rural_df.head()

C:\Users\emilm\AppData\Local\Temp\ipykernel_5064\517857554.py:6: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp_df['time'] = pd.to_datetime(temp_df['time'], format="%Y-%m-%d")

urban_df = temp_df.loc[temp_df['location_id'] == 0, ['time', 'apparent_temperature_max (°C)']]
urban_df = urban_df[urban_df['time'] != pd.to_datetime('2025-01-01')]
urban_df.head()

plt.figure(figsize=(10, 5))
plt.plot(rural_df['time'], rural_df['temp'], label='Rural Regions', marker='o')
plt.plot(urban_df['time'], urban_df['apparent_temperature_max (°C)'], label='Metro Manila', marker='s')

plt.xlabel('Days')
plt.ylabel('Temperature (°C)')
plt.title('Temperature Trends at Metro Manila and Rural Regions Around It')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

UHI_days = pd.DataFrame({'days' : rural_df['time'], 'uhi_intensity' : urban_df['apparent_temperature_max (°C)'] - rural_df['temp']})

UHI_days.head()

plt.plot(UHI_days['days'], UHI_days['uhi_intensity'])
plt.title('UHI Intensity in Metro Manila')
plt.xlabel('Days')
plt.ylabel('UHI Intensity')
plt.tight_layout()
plt.show()

uhi = UHI_days.set_index('days').resample('MS').mean().reset_index()
uhi = uhi.rename(columns={'days': 'time'})  # Now 'days' is a column again
uhi.head(len(uhi))

plt.figure(figsize=(10, 6))
plt.plot(uhi['time'], uhi['uhi_intensity'])
plt.title('UHI Intensity in Metro Manila')
plt.xlabel('Time')
plt.ylabel('UHI Intensity')

# Improve x-axis formatting for dates
plt.gcf().autofmt_xdate()  # Auto-rotate date labels

plt.tight_layout()
plt.show()

sns.histplot(co2['emissions'], kde=True)
plt.title('CO2 Vehicle Emissions in Metro Manila')
plt.show()

sns.histplot(uhi['uhi_intensity'], kde=True)
plt.title('UHI Intensity in Metro Manila')
plt.show()

uhi['month'] = uhi['time'].dt.month
uhi['season'] = uhi['month'].apply(lambda m: 'Dry' if m in [1,2,3,4,5,11,12] else 'Wet')

uhi.head(len(uhi))

uhi_wet = uhi.loc[uhi['season'] == 'Wet']

sns.histplot(uhi_wet['uhi_intensity'], kde=True)
plt.title('UHI Intensity in Metro Manila (Wet Season)')
plt.show()

uhi_dry = uhi.loc[uhi['season'] == 'Dry']

sns.histplot(uhi_dry['uhi_intensity'], kde=True)
plt.title('UHI Intensity in Metro Manila (Dry Season)')
plt.show()

wet = uhi.loc[uhi['season'] == 'Wet', 'uhi_intensity']
dry = uhi.loc[uhi['season'] == 'Dry', 'uhi_intensity']

print("Wet:", shapiro(wet))
print("Dry:", shapiro(dry))

Wet: ShapiroResult(statistic=np.float64(0.9855929347076213), pvalue=np.float64(0.7967748536985817))
Dry: ShapiroResult(statistic=np.float64(0.8847006381621425), pvalue=np.float64(1.0152257530121138e-05))

find_outliers = uhi_dry.loc[uhi_dry['uhi_intensity'] < 3, ['time', 'month']]

find_outliers.head(len(find_outliers))

uhi = uhi.loc[uhi['time'] >= pd.to_datetime('2021-01-01')].reset_index()

uhi.head(len(uhi))

uhi['season'] = uhi['month'].apply(lambda m: 'Dry' if m in [1,2,3,4,11,12] else 'Wet')

uhi_dry = uhi.loc[uhi['season'] == 'Dry']

sns.histplot(uhi_dry['uhi_intensity'], kde=True)
plt.title('UHI Intensity in Metro Manila (Dry Season)')
plt.show()

uhi_wet = uhi.loc[uhi['season'] == 'Wet']

sns.histplot(uhi_wet['uhi_intensity'], kde=True)
plt.title('UHI Intensity in Metro Manila (Wet Season)')
plt.show()

wet = uhi.loc[uhi['season'] == 'Wet', 'uhi_intensity']
dry = uhi.loc[uhi['season'] == 'Dry', 'uhi_intensity']

print("Wet:", shapiro(wet))
print("Dry:", shapiro(dry))

Wet: ShapiroResult(statistic=np.float64(0.9503512024972561), pvalue=np.float64(0.2757285588204675))
Dry: ShapiroResult(statistic=np.float64(0.9474607387350519), pvalue=np.float64(0.2385556747288774))

uhi = pd.merge(co2, uhi, on='time')

uhi.head(len(uhi))

for group in uhi['season'].unique():
    sub = uhi[uhi['season'] == group]
    rho, pval = spearmanr(sub['emissions'], sub['uhi_intensity'])
    print(f"{group} -> Spearman r = {rho:.3f}, p = {pval:.3f}")

Dry -> Spearman r = -0.228, p = 0.283
Wet -> Spearman r = 0.309, p = 0.141

for group in uhi['season'].unique():
    sub = uhi[uhi['season'] == group]
    rho, pval = pearsonr(sub['emissions'], sub['uhi_intensity'])
    print(f"{group} -> Pearson r = {rho:.3f}, p = {pval:.3f}")

Dry -> Pearson r = -0.148, p = 0.489
Wet -> Pearson r = 0.306, p = 0.146

plt.figure(figsize=(10, 6))
sns.lmplot(data=uhi.loc[uhi['season'] == 'Wet'], x='emissions', y='uhi_intensity', hue='season')
plt.title('CO2 Vehicle Emission vs. UHI Intensity in Metro Manila During Wet Season')
plt.show()

<Figure size 1000x600 with 0 Axes>

plt.figure(figsize=(10, 6))
sns.lmplot(data=uhi.loc[uhi['season'] == 'Dry'], x='emissions', y='uhi_intensity', hue='season')
plt.title('CO2 Vehicle Emission vs. UHI Intensity in Metro Manila During Dry Season')
plt.show()

<Figure size 1000x600 with 0 Axes>

uhi_wet = uhi.loc[uhi['season'] == 'Wet']

fig, ax1 = plt.subplots(figsize=(14,6))

ax1.set_xlabel('time')
ax1.set_ylabel('Emissions (tons)', color='tab:red')
ax1.plot(uhi_wet['time'], uhi_wet['emissions'], color='tab:red', label='Emissions')
ax1.tick_params(axis='y', labelcolor='tab:red')

ax2 = ax1.twinx()
ax2.set_ylabel('UHI Intensity (Celsius)', color='tab:blue')
ax2.plot(uhi_wet['time'], uhi_wet['uhi_intensity'], color='tab:blue', label='UHI')
ax2.tick_params(axis='y', labelcolor='tab:blue')

plt.title('Emissions and UHI Intensity in Metro Manila During Wet Season')
fig.tight_layout()
plt.show()

uhi_dry = uhi.loc[uhi['season'] == 'Dry']

fig, ax1 = plt.subplots(figsize=(14,6))

ax1.set_xlabel('time')
ax1.set_ylabel('Emissions (tons)', color='tab:red')
ax1.plot(uhi_dry['time'], uhi_dry['emissions'], color='tab:red', label='Emissions')
ax1.tick_params(axis='y', labelcolor='tab:red')

ax2 = ax1.twinx()
ax2.set_ylabel('UHI Intensity (Celsius)', color='tab:blue')
ax2.plot(uhi_dry['time'], uhi_dry['uhi_intensity'], color='tab:blue', label='UHI')
ax2.tick_params(axis='y', labelcolor='tab:blue')

plt.title('Emissions and UHI Intensity in Metro Manila During Dry Season')
fig.tight_layout()
plt.show()

# Create lagged emissions (e.g., 1 to 3 months)
uhi_dry['emissions_lag1'] = uhi_dry['emissions'].shift(1)
uhi_dry['emissions_lag2'] = uhi_dry['emissions'].shift(2)
uhi_dry['emissions_lag3'] = uhi_dry['emissions'].shift(3)

for lag in ['emissions_lag1', 'emissions_lag2', 'emissions_lag3']:
    valid_data = uhi_dry[[lag, 'uhi_intensity']].dropna()
    r, p = pearsonr(valid_data[lag], valid_data['uhi_intensity'])
    print(f"Lag: {lag} → r = {r:.2f}, p = {p:.3f}")

Lag: emissions_lag1 → r = -0.32, p = 0.136
Lag: emissions_lag2 → r = 0.31, p = 0.154
Lag: emissions_lag3 → r = 0.18, p = 0.427

C:\Users\emilm\AppData\Local\Temp\ipykernel_5064\4145901415.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  uhi_dry['emissions_lag1'] = uhi_dry['emissions'].shift(1)
C:\Users\emilm\AppData\Local\Temp\ipykernel_5064\4145901415.py:3: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  uhi_dry['emissions_lag2'] = uhi_dry['emissions'].shift(2)
C:\Users\emilm\AppData\Local\Temp\ipykernel_5064\4145901415.py:4: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  uhi_dry['emissions_lag3'] = uhi_dry['emissions'].shift(3)

fig, ax1 = plt.subplots(figsize=(14,6))

ax1.set_xlabel('time')
ax1.set_ylabel('Emissions (tons)', color='tab:red')
ax1.plot(uhi_dry['time'], uhi_dry['emissions_lag2'], color='tab:red', label='Emissions (lag 2)')
ax1.tick_params(axis='y', labelcolor='tab:red')

ax2 = ax1.twinx()
ax2.set_ylabel('UHI Intensity (Celsius)', color='tab:blue')
ax2.plot(uhi_dry['time'], uhi_dry['uhi_intensity'], color='tab:blue', label='UHI')
ax2.tick_params(axis='y', labelcolor='tab:blue')

plt.title('Emissions and UHI Intensity in Metro Manila During Dry Season Lagged by 2 Months')
fig.tight_layout()
plt.show()

# train model 1 Wet Model

uhi_wet = uhi.loc[uhi['season'] == 'Wet', ['time', 'season', 'emissions', 'uhi_intensity']].reset_index()

X_w = uhi_wet[['emissions']]
Y_w = uhi_wet['uhi_intensity']

split_idx = int(len(uhi_wet) * 0.8)

X_train, X_test = X_w.iloc[:split_idx], X_w.iloc[split_idx:]
y_train, y_test = Y_w.iloc[:split_idx], Y_w.iloc[split_idx:]

wet_model = LinearRegression()
wet_model.fit(X_train, y_train)

y_pred = wet_model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"MSE: {mse:.2f}, R²: {r2:.2f}")

MSE: 0.44, R²: -0.05

# train model 2 Dry Model

uhi_dry = uhi.loc[uhi['season'] == 'Dry', ['time', 'season', 'emissions', 'uhi_intensity']].reset_index()
uhi_dry['emissions_lag2'] = uhi_dry['emissions'].shift(2)
valid_data = uhi_dry[['emissions_lag2', 'uhi_intensity']].dropna()

X_w = valid_data[['emissions_lag2']]
Y_w = valid_data['uhi_intensity']

split_idx = int(len(valid_data) * 0.8)

X_train, X_test = X_w.iloc[:split_idx], X_w.iloc[split_idx:]
y_train, y_test = Y_w.iloc[:split_idx], Y_w.iloc[split_idx:]

dry_model = LinearRegression()
dry_model.fit(X_train, y_train)

y_pred = dry_model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"MSE: {mse:.2f}, R²: {r2:.2f}")

MSE: 0.45, R²: 0.02

sns.regplot(x='emissions', y='uhi_intensity', data=uhi_wet)
plt.title("Check Linearity")
plt.show()

sns.regplot(x='emissions_lag2', y='uhi_intensity', data=valid_data)
plt.title("Check Linearity")
plt.show()

def seasonal_model(df):
    y_pred = np.full(len(df), np.nan)

    # Wet season
    wet_mask = df['season'] == 'Wet'
    if wet_mask.any():
        y_pred[wet_mask] = wet_model.predict(df.loc[wet_mask, ['emissions']])

    dry_mask = df['season'] == 'Dry'
    valid_dry = dry_mask & df['emissions_lag2'].notnull()
    if valid_dry.any():
        y_pred[valid_dry] = dry_model.predict(df.loc[valid_dry, ['emissions_lag2']])

    return y_pred

uhi['emissions_lag2'] = uhi['emissions'].shift(2)

uhi['uhi_predicted'] = seasonal_model(uhi)

uhi.head()

plt.figure(figsize=(14, 6))
plt.plot(uhi['time'], uhi['uhi_intensity'], label='Actual UHI Intensity', color='blue', linewidth=2)
plt.plot(uhi['time'], uhi['uhi_predicted'], label='Predicted UHI Intensity', color='red', linestyle='--', linewidth=2)

plt.title('Actual vs Predicted UHI Intensity Over Time')
plt.xlabel('Time')
plt.ylabel('UHI Intensity (°C)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

filtered = uhi[uhi['uhi_predicted'].notna()]
mse = mean_squared_error(filtered['uhi_intensity'],filtered['uhi_predicted'])
r2 = r2_score(filtered['uhi_intensity'], filtered['uhi_predicted'])

print(f"MSE: {mse:.2f}, R²: {r2:.2f}")

MSE: 0.57, R²: 0.80

	source_name	start_time	emissions_quantity
0	Kalookan City	01/01/2021 0:00	8273.046
1	Kalookan City	01/02/2021 0:00	7430.703
2	Kalookan City	01/03/2021 0:00	8501.110
3	Kalookan City	01/04/2021 0:00	8277.595
4	Kalookan City	01/05/2021 0:00	8713.652

	time	apparent_temperature_max (°C)
0	2015-01-01	24.6
1	2015-01-02	25.0
2	2015-01-03	27.0
3	2015-01-04	29.6
4	2015-01-05	31.6

	location_id	latitude	longitude	elevation	utc_offset_seconds	timezone	timezone_abbreviation
0	0	14.586995	121.002785	6.0	28800	Asia/Singapore	GMT+8
1	1	15.008787	121.176476	451.0	28800	Asia/Singapore	GMT+8
2	2	14.586995	121.337040	340.0	28800	Asia/Singapore	GMT+8
3	3	14.235501	121.857666	0.0	28800	Asia/Singapore	GMT+8
4	4	14.657293	121.449814	409.0	28800	Asia/Singapore	GMT+8

	time	emissions
0	2021-01-01	377670.22149
1	2021-02-01	339525.14791
2	2021-03-01	387995.25552
3	2021-04-01	377475.05045
4	2021-05-01	396570.43498
5	2021-06-01	379921.14372
6	2021-07-01	392807.14084
7	2021-08-01	390733.51388
8	2021-09-01	379318.16057
9	2021-10-01	392591.88060
10	2021-11-01	381343.78644
11	2021-12-01	386515.27786
12	2022-01-01	381604.33461
13	2022-02-01	353575.80509
14	2022-03-01	400996.92616
15	2022-04-01	391602.20090
16	2022-05-01	408975.94898
17	2022-06-01	394134.70433
18	2022-07-01	404456.35178
19	2022-08-01	397526.56620
20	2022-09-01	383068.23510
21	2022-10-01	391313.28340
22	2022-11-01	377836.88060
23	2022-12-01	378581.76460
24	2023-01-01	368564.99500
25	2023-02-01	337600.68730
26	2023-03-01	378488.24370
27	2023-04-01	365214.93230
28	2023-05-01	376824.74200
29	2023-06-01	358761.45820
30	2023-07-01	367646.73790
31	2023-08-01	365413.87780
32	2023-09-01	356072.74510
33	2023-10-01	367862.70530
34	2023-11-01	359216.45320
35	2023-12-01	364077.70730
36	2024-01-01	358601.57630
37	2024-02-01	344006.02104
38	2024-03-01	376537.27276
39	2024-04-01	367546.37059
40	2024-05-01	383132.59850
41	2024-06-01	367984.67010
42	2024-07-01	378345.58970
43	2024-08-01	374929.52700
44	2024-09-01	356072.74510
45	2024-10-01	367862.70530
46	2024-11-01	359216.45320
47	2024-12-01	364077.70730

	time	temp
0	2015-01-01	22.700370
1	2015-01-02	22.300317
2	2015-01-03	24.700380
3	2015-01-04	26.766945
4	2015-01-05	28.416957

Updated Exploratory Data Analysis¶

Data Preprocessing¶

Machine Learning Model¶

Conclusion¶

Call To Action¶

	days	uhi_intensity
0	2015-01-01	1.899630
1	2015-01-02	2.699683
2	2015-01-03	2.299620
3	2015-01-04	2.833055
4	2015-01-05	3.183043

	time	uhi_intensity
0	2015-01-01	2.302392
1	2015-02-01	2.110446
2	2015-03-01	2.540628
3	2015-04-01	2.765333
4	2015-05-01	1.565850
...	...	...
115	2024-08-01	2.129909
116	2024-09-01	2.234727
117	2024-10-01	3.851909
118	2024-11-01	5.033693
119	2024-12-01	6.350254

	index	time	uhi_intensity	month	season
0	72	2021-01-01	5.631497	1	Dry
1	73	2021-02-01	5.513525	2	Dry
2	74	2021-03-01	5.515976	3	Dry
3	75	2021-04-01	4.908226	4	Dry
4	76	2021-05-01	4.619258	5	Dry
5	77	2021-06-01	2.083114	6	Wet
6	78	2021-07-01	2.198122	7	Wet
7	79	2021-08-01	2.255679	8	Wet
8	80	2021-09-01	3.275938	9	Wet
9	81	2021-10-01	4.454611	10	Wet
10	82	2021-11-01	5.915847	11	Dry
11	83	2021-12-01	6.502946	12	Dry
12	84	2022-01-01	5.414303	1	Dry
13	85	2022-02-01	6.689056	2	Dry
14	86	2022-03-01	6.306831	3	Dry
15	87	2022-04-01	5.870384	4	Dry
16	88	2022-05-01	3.495092	5	Dry
17	89	2022-06-01	3.297120	6	Wet
18	90	2022-07-01	2.904144	7	Wet
19	91	2022-08-01	2.400901	8	Wet
20	92	2022-09-01	1.804772	9	Wet
21	93	2022-10-01	4.174536	10	Wet
22	94	2022-11-01	5.530873	11	Dry
23	95	2022-12-01	5.826043	12	Dry
24	96	2023-01-01	6.559921	1	Dry
25	97	2023-02-01	6.088468	2	Dry
26	98	2023-03-01	6.444481	3	Dry
27	99	2023-04-01	5.060991	4	Dry
28	100	2023-05-01	3.771385	5	Dry
29	101	2023-06-01	2.365374	6	Wet
30	102	2023-07-01	2.540081	7	Wet
31	103	2023-08-01	1.279245	8	Wet
32	104	2023-09-01	2.255993	9	Wet
33	105	2023-10-01	4.101384	10	Wet
34	106	2023-11-01	6.643614	11	Dry
35	107	2023-12-01	6.240068	12	Dry
36	108	2024-01-01	6.912077	1	Dry
37	109	2024-02-01	6.623362	2	Dry
38	110	2024-03-01	6.069189	3	Dry
39	111	2024-04-01	4.986055	4	Dry
40	112	2024-05-01	4.732681	5	Dry
41	113	2024-06-01	3.254375	6	Wet
42	114	2024-07-01	3.138519	7	Wet
43	115	2024-08-01	2.129909	8	Wet
44	116	2024-09-01	2.234727	9	Wet
45	117	2024-10-01	3.851909	10	Wet
46	118	2024-11-01	5.033693	11	Dry
47	119	2024-12-01	6.350254	12	Dry

	time	month
0	2015-01-01	1
1	2015-02-01	2
2	2015-03-01	3
3	2015-04-01	4
4	2015-05-01	5
10	2015-11-01	11
11	2015-12-01	12
12	2016-01-01	1
13	2016-02-01	2
14	2016-03-01	3
15	2016-04-01	4
16	2016-05-01	5
22	2016-11-01	11
23	2016-12-01	12
52	2019-05-01	5