df_files = ['df07302202201151120',
            'df07302202201151220',
            'df07302202201151320',
            'df07302202201151420',
            'df07302202201151520',
            'df07302202201151620',
            'df07302202201151720',
            'df07302202201151820',
            'df07302202201151920',
            'df07302202201152020',
            ]


# Notebook created using jupyterlab=3.2.1, python=3.9.7, pandas=1.3.5, matplotlib=3.5.0
import pandas as pd
from pandas import read_csv 
import matplotlib.pyplot as plt
import numpy as np


def df_date(df_file):
    """
    Return a python date object from the df file name
    """
    date = pd.to_datetime(df_file[7:20])
    return date


def get_url(df_file):
    """
    Return a url where the df file exists
    """
    stn = df_file[2:5]
    stream = df_file[5:7]
    date = df_date(df_file)
    year = date.year
    month = date.month_name()[:3].lower()
    url = f'https://cdip.ucsd.edu/data01/PUBLIC_DATA/dsk_{year}/{stn}/{month}/{stream}/{df_file}'
    return url


def parse_df(df_files):
    """
    Parse df files ignoring the 25 line header metadata returning an array
    Note: Header size may vary for different sensors
    """
    data = []
    for file in df_files:
        _df = read_csv(get_url(file),
                      skiprows=25,
                      names=['c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8'],
                      delimiter='\s+')
        # Combine data into a single data array
        for index, row in _df.iterrows():
            data.extend([row['c1'], row['c2'], row['c3'], row['c4'],
                        row['c5'], row['c6'], row['c7'], row['c8']])
    return data


# Get data and make a DataFrame
df = pd.DataFrame(parse_df(df_files))
df.columns = ['Water column (cm)']

# Create Date index with 1 second frequency
start_date = df_date(df_files[0])
df['Date'] = pd.date_range(start=start_date, periods=len(df), freq='s')

# Make Date the DataFrame index
df.set_index('Date', inplace=True)

# Quality Control 
df.replace(to_replace=-9999.9, value=np.nan, inplace=True,)
df.interpolate(inplace=True, limit=2)

# Calculate the rolling 60 second mean
df['rolling mean 60s'] = df['Water column (cm)'].rolling(
    60, center=True).mean()

# Calculate the tide with a 1 hour window rolling mean
window = 3600
df['tide'] = df['Water column (cm)'].rolling(window, center=True).mean()


# Get start and end index where tide is available
start_idx = int(window/2)
end_idx = int(len(df) - window/2)

# Plot data
df[start_idx:end_idx].plot(figsize=(15, 8),
     title="SIO pier water column heights, 1-minute averages",
     ylabel="Water column, cm",
     xlabel=f"Date (UTC)\n{df.index[start_idx]} - {df.index[end_idx]}",).grid(axis='y')


# Calculate sea level (1 minute mean) with tide removed
df['tide removed'] = df['rolling mean 60s'] - df['tide']

# Plot tide removed
df['tide removed'][start_idx:end_idx].plot(figsize=(15, 8),
     legend=True,
     title="SIO pier sea level, tide removed, 1-minute averages",
     ylabel="Sea level, cm",
     xlabel=f"Date (UTC)\n{df.index[start_idx]} - {df.index[end_idx]}",).grid(axis='y')

January 2022 Hunga Tonga volcanic eruption and tsunami¶

Enter file names to plot¶

Import Libraries¶

Functions¶

Get Data¶

Plot¶