The January 15, 2022 volcanic eruption occurred near Tonga in the South Pacific. CDIP's Scripps Pier pressure sensor recorded a clear tsunami signal arriving shortly before 16:00 UTC (08:00 PST) on January 15. Preliminary analysis shows a peak amplitude of over 20cm.
Pressure sensor data from CDIP's 073 - Scripps Pier La Jolla, CA can be manually downloaded at https://cdip.ucsd.edu/themes/?pb=1&d2=p70&u2=s:073:st:1:v:download_other In this example we're using multiple 1 hour disk farm (df) records which can be also be individually found at https://cdip.ucsd.edu/data01/PUBLIC_DATA/dsk_2022/073/jan/02/
df_files = ['df07302202201151120',
'df07302202201151220',
'df07302202201151320',
'df07302202201151420',
'df07302202201151520',
'df07302202201151620',
'df07302202201151720',
'df07302202201151820',
'df07302202201151920',
'df07302202201152020',
]
# Notebook created using jupyterlab=3.2.1, python=3.9.7, pandas=1.3.5, matplotlib=3.5.0
import pandas as pd
from pandas import read_csv
import matplotlib.pyplot as plt
import numpy as np
def df_date(df_file):
"""
Return a python date object from the df file name
"""
date = pd.to_datetime(df_file[7:20])
return date
def get_url(df_file):
"""
Return a url where the df file exists
"""
stn = df_file[2:5]
stream = df_file[5:7]
date = df_date(df_file)
year = date.year
month = date.month_name()[:3].lower()
url = f'https://cdip.ucsd.edu/data01/PUBLIC_DATA/dsk_{year}/{stn}/{month}/{stream}/{df_file}'
return url
def parse_df(df_files):
"""
Parse df files ignoring the 25 line header metadata returning an array
Note: Header size may vary for different sensors
"""
data = []
for file in df_files:
_df = read_csv(get_url(file),
skiprows=25,
names=['c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8'],
delimiter='\s+')
# Combine data into a single data array
for index, row in _df.iterrows():
data.extend([row['c1'], row['c2'], row['c3'], row['c4'],
row['c5'], row['c6'], row['c7'], row['c8']])
return data
# Get data and make a DataFrame
df = pd.DataFrame(parse_df(df_files))
df.columns = ['Water column (cm)']
# Create Date index with 1 second frequency
start_date = df_date(df_files[0])
df['Date'] = pd.date_range(start=start_date, periods=len(df), freq='s')
# Make Date the DataFrame index
df.set_index('Date', inplace=True)
# Quality Control
df.replace(to_replace=-9999.9, value=np.nan, inplace=True,)
df.interpolate(inplace=True, limit=2)
# Calculate the rolling 60 second mean
df['rolling mean 60s'] = df['Water column (cm)'].rolling(
60, center=True).mean()
# Calculate the tide with a 1 hour window rolling mean
window = 3600
df['tide'] = df['Water column (cm)'].rolling(window, center=True).mean()
# Get start and end index where tide is available
start_idx = int(window/2)
end_idx = int(len(df) - window/2)
# Plot data
df[start_idx:end_idx].plot(figsize=(15, 8),
title="SIO pier water column heights, 1-minute averages",
ylabel="Water column, cm",
xlabel=f"Date (UTC)\n{df.index[start_idx]} - {df.index[end_idx]}",).grid(axis='y')
# Calculate sea level (1 minute mean) with tide removed
df['tide removed'] = df['rolling mean 60s'] - df['tide']
# Plot tide removed
df['tide removed'][start_idx:end_idx].plot(figsize=(15, 8),
legend=True,
title="SIO pier sea level, tide removed, 1-minute averages",
ylabel="Sea level, cm",
xlabel=f"Date (UTC)\n{df.index[start_idx]} - {df.index[end_idx]}",).grid(axis='y')