Obspy
Contents
Obspy#
Author: Fu Yin
Update: August 5, 2022
Reading: 30 min
Event Data Process#
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed May 5 15:38:21 2021
sac 文件要求头文件内标记了发震时刻与震中距方位角信息
@author: yf
"""
import os
import glob
import obspy
import subprocess
import numpy as np
from obspy import Stream
from obspy.taup import TauPyModel
from obspy.taup.taup_create import build_taup_model
from math import sin,cos,pi
from obspy.core import UTCDateTime
from MCMTpy.utils.asdf_function import get_StationXML,Add_waveforms_head_info
from MCMTpy.utils.asdf_function import Add_waveforms,Add_stationxml,get_QuakeML,Add_quakeml
######################
def get_taup_tp_ts(model,depth,distance,degree=None):
if degree==False:
distance = distance/111.19
time_p = model.get_travel_times(source_depth_in_km=depth,
distance_in_degree=distance,
phase_list=["p", "P"])
time_s = model.get_travel_times(source_depth_in_km=depth,
distance_in_degree=distance,
phase_list=["s", "S"])
ray_p = time_p[0].ray_param
tp = time_p[0].time
angle_p = time_p[0].incident_angle
ray_s = time_s[0].ray_param
ts = time_s[0].time
angle_s = time_s[0].incident_angle
return ray_p,tp,angle_p,ray_s,ts,angle_s
def rotate(data_raw,chn=['RTZ','ENZ']):
'''
Parameters
----------
data : TYPE
DESCRIPTION.
chn : TYPE, optional
DESCRIPTION. The default is ['RTZ','ENZ'].
Raises
------
ValueError
DESCRIPTION.
Returns
-------
data : TYPE
DESCRIPTION.
'''
data = data_raw.copy()
if ('R' in chn[0]) and ('T' in chn[0]) and ('Z' in chn[0]) and ('E' in chn[1]) and ('N' in chn[1]) and ('Z' in chn[1]):
r_in_index = chn[0].index('R')
t_in_index = chn[0].index('T')
z_in_index = chn[0].index('Z')
e_out_index = chn[1].index('E')
n_out_index = chn[1].index('N')
z_out_index = chn[1].index('Z')
for i in range(0,len(data),3):
BAZ = data[i].stats.back_azimuth/180*pi #float( baz ) # labels[9] 表示 asdf 文件中 baz 的值 Raw_data[chn].stats.asdf['labels'][9]
R = data[i+r_in_index].data
T = data[i+t_in_index].data
Z = data[i+z_in_index].data
E = -cos(BAZ)*T - sin(BAZ)*R
N = sin(BAZ)*T - cos(BAZ)*R
data[i+e_out_index].stats.channel='E'
data[i+e_out_index].data = E
data[i+n_out_index].stats.channel='N'
data[i+n_out_index].data = N
data[i+z_out_index].stats.channel='Z'
data[i+z_out_index].data = Z
elif ('E' in chn[0]) and ('N' in chn[0]) and ('Z' in chn[0]) and ('R' in chn[1]) and ('T' in chn[1]) and ('Z' in chn[1]):
e_in_index = chn[0].index('E')
n_in_index = chn[0].index('N')
z_in_index = chn[0].index('Z')
r_out_index = chn[1].index('R')
t_out_index = chn[1].index('T')
z_out_index = chn[1].index('Z')
for i in range(0,len(data),3):
BAZ = data[i].stats.back_azimuth/180*pi #float( baz ) # labels[9] 表示 asdf 文件中 baz 的值 Raw_data[chn].stats.asdf['labels'][9]
E = data[i+e_in_index].data
N = data[i+n_in_index].data
Z = data[i+z_in_index].data
R = -cos(BAZ)*N - sin(BAZ)*E
T = sin(BAZ)*N - cos(BAZ)*E
data[i+r_out_index].stats.channel='R'
data[i+r_out_index].data = R
data[i+t_out_index].stats.channel='T'
data[i+t_out_index].data = T
data[i+z_out_index].stats.channel='Z'
data[i+z_out_index].data = Z
else:
raise ValueError('chn format error! abort!')
return data
######################
#######################################################
################PARAMETER SECTION######################
#######################################################
#%% 1. data/file paths
RAWDATA = "./YN.202105212148_MCMTpy_hpick" # dir where SAC files are located
allfiles_path = os.path.join(RAWDATA,'*sac') # make sure all sac files can be found through this format
# 2. output data/file paths
Output_path = "./YN.202105212148_MCMTpy_hpick_m_2" # 输出文件存放的位置
source_name = "source_enz" # asdf 文件的 source_tag
ASDF_filename = "YN.202105212148" # asdf 文件的名称
# 3. 创建自己的速度模型,只有第一次需要 build_taup_model 计算走时表
taup_pick = 'no' # select 'yes' to do Ray tracing with taup
v_model_path = '../v_model/v_model.nd'
# build_taup_model(v_model_path, output_folder='../v_model', verbose=True)
model_path = "../v_model/v_model.npz"
# 4. useful parameters
rotate_mode = False # 是否要旋转分量
chn = ['RTZ','ENZ'] # rotate from chn[0] to chn[1]
freqmin = 0.005 # pre filtering frequency bandwidth (hz)
freqmax = 2 # note this cannot exceed Nquist frequency (hz)
samp_freq = 5 # targeted sampling rate (hz)
p_n0 = 100 # P波到达前的采样点个数
npts = 2048 # 数据点长度(如果超过endtime,则按照endtime处理),trim时可能存在一个采样点的误差
# 5. event infomation (需要手动输入)
UTCtime = UTCDateTime('2021-05-21'+'T'+'21:48:35.36'+'+08') # 地震目录的时间都是北京时间,时区为 UTC+8,需要转换成 UTC 时间统一标准
nptime = UTCtime.timestamp
year = UTCtime.year
julday = UTCtime.julday
hour = UTCtime.hour
minute = UTCtime.minute
second = UTCtime.second
microsecond = 0 # UTCtime.microsecond, 请始终保持 0
evla = 25.682
evlo = 99.881
evdp = 17
mag = 6.6
##################################################
# we expect no parameters need to be changed below
# assemble parameters for data pre-processing
prepro_para = {'RAWDATA':RAWDATA, 'allfiles_path':allfiles_path,\
'Output_path':Output_path, 'source_name':source_name, 'ASDF_filename':ASDF_filename,\
'taup_pick':taup_pick, 'v_model_path':v_model_path, 'model_path':model_path,\
'freqmin':freqmin, 'freqmax':freqmax, 'samp_freq':samp_freq, 'p_n0':p_n0, 'npts':npts,\
'UTCtime':UTCtime, 'evla':evla, 'evlo':evlo, 'evdp':evdp, 'mag':mag}
metadata = os.path.join(Output_path,'process_info.txt')
# output parameter info
if not os.path.isdir(Output_path):os.mkdir(Output_path)
fout = open(metadata,'w')
fout.write(str(prepro_para))
fout.close()
##########################################################
#################PROCESSING SECTION#######################
##########################################################
#%% 0. sac 写入发震事件信息 (改成obspy更好!)
os.putenv("SAC_DISPLAY_COPYRIGHT", '0') # 设置环境变量
cmd_sac = subprocess.Popen(['/Users/yf/1.Software/1.SAC/sac/bin/sac'], stdin=subprocess.PIPE, cwd=RAWDATA) # 利用sac程序去除仪器响应 /Users/yf/1.Software/1.SAC/sac/bin/sac
s = ''
for sacfile in sorted(glob.glob(allfiles_path)):
sac_name = sacfile.split('/')[-1]
s += "r %s \n" % sac_name
s += "ch o gmt %s %s %s %s %s %s \n" % (year,julday,hour,minute,second,microsecond) # 使用 sac 把发震时间写入头部变量 o 中
s += "ch allt (0 - &1,o&) iztype IO \n" # 将 发震时间 设置成 参考时间
s += "ch evla %f evlo %f evdp %f mag %f\n" % (evla,evlo,evdp,mag) # 写入事件位置
s += "wh\n"
s += "q \n"
stdout, stderr = cmd_sac.communicate(s.encode()) # Popen.communicate 命令会自带 Popen.wait(), 但保险起见还是需要自己添加 wait()命令
cmd_sac.wait()
#%% 1. obspy 读取原始数据 return: data
allfiles = sorted( glob.glob(allfiles_path) )
data_raw = Stream()
for i in range(0,len(allfiles),1):
try:
tr = obspy.read(allfiles[i])
# print(tr[0].stats.sac.dist,tr[0].stats.sac.evdp)
data_raw += tr
except Exception:
print(allfiles[i],': no such file or obspy read error');continue
data = data_raw.copy()
#%% 2. 读取台站的经纬度信息output "NET_STA" used in samole_dc.json and plot_dc.json
# 2.1 读取台站信息,并按照震中距排序
unique_stations = np.unique([tr.stats.station for tr in data]) # 所有台站的名称 np.unique 去除重复元素
sta_num = unique_stations.shape[0]
NET_STA = np.empty(shape=(sta_num,6),dtype='object') # 创建空数组,类型是 object 可以包含字符和数字
for i in range(0,sta_num,1):
tr = data.select(station=unique_stations[i])[0] # Just taking the first Trace if multiple
NET_STA[i,0] = tr.stats.network
NET_STA[i,1] = tr.stats.station
NET_STA[i,2] = tr.stats.sac.stla
NET_STA[i,3] = tr.stats.sac.stlo
NET_STA[i,4] = 0 # 台站的深度始终保持 0
NET_STA[i,5] = tr.stats.sac.dist
NET_STA = NET_STA[ NET_STA[:,5].argsort() ] # 按照 dist 那一列排序
# 2.2 将 NET_STA 写入文件
fp = open('./NET_STA.txt', 'w') # "NET_STA" used in samole_dc.json and plot_dc.json
fp.write('[')
for i in range(0,sta_num,1):
fp.write( str(NET_STA[i,0:5].tolist()) )
fp.write(',')
if i != (sta_num-1):
fp.write('\n')
fp.write(']')
fp.close()
fp = open('./NET_STA_dist.txt', 'w') # 包含震中距信息
fp.write('[')
for i in range(0,sta_num,1):
fp.write( str(NET_STA[i,:].tolist()) )
fp.write(',')
if i != (sta_num-1):
fp.write('\n')
fp.write(']')
fp.close()
#%% 3. taup射线追踪拾取到时
if taup_pick=='yes':
model = TauPyModel(model=model_path) # model_path 还可以是 "iasp91" "prem"
for i in range(0,len(data),1):
depth = data[i].stats.sac['evdp']
distance = data[i].stats.sac['dist']
ray_p,tp,angle_p,ray_s,ts,angle_s = get_taup_tp_ts(model,depth,distance,degree=False)
data[i].stats.sac["t1"]=tp # 与 pyfk 标记方法一致,多了 ray_p ray_s
data[i].stats.sac["user1"]=angle_p
data[i].stats.sac["user3"]=ray_p
data[i].stats.sac["t2"]=ts
data[i].stats.sac["user2"]=angle_s
data[i].stats.sac["user4"]=ray_s
# 3.1. 利用 taup 画 最后一个事件的 raypath
# if taup_pick=='yes':
# path_p = model.get_ray_paths(source_depth_in_km=depth,
# distance_in_degree=distance/111.19,
# phase_list=["p", "P","s", "S"])
# path_p[0].path.dtype
# ax = path_p.plot_rays(plot_type="cartesian",legend=True) # 'spherical' cartesian
#%% 4. 去均值去趋势
data.detrend(type='demean')
data.detrend(type='simple')
data.taper(max_percentage=0.05)
#%% 5. 滤波然后降采样 and 裁剪数据
# 5.1 滤波降采样
data.filter('bandpass', freqmin=freqmin, freqmax=freqmax, corners=4, zerophase=True) # bandpass
data.resample(samp_freq,window='hanning',no_filter=True, strict_length=False) # resample
# 5.2 裁剪每一个分量数据
for i in range(0,len(data),1):
b = data[i].stats.sac['b']
t1 = data[i].stats.sac['t1']
t_start = data[i].stats.starttime + (t1-b) - p_n0/samp_freq
# 判断数据长度 与 endtime 的关系
t_end_npts = t_start+npts/samp_freq
t_endtime = data[i].stats.endtime
if t_end_npts > t_endtime:
t_end = t_endtime
else:
t_end = t_end_npts
data[i].trim(t_start, t_end)
# 5.3 把三分量数据裁成一样长
for i in range(0,round(len(data)/3)):
t_start_1 = data[3*i].stats.starttime
t_start_2 = data[3*i+1].stats.starttime
t_start_3 = data[3*i+2].stats.starttime
t_end_1 = data[3*i].stats.endtime
t_end_2 = data[3*i+1].stats.endtime
t_end_3 = data[3*i+2].stats.endtime
t_start = max(t_start_1,t_start_2,t_start_3)
t_end = min(t_end_1,t_end_2,t_end_3)
data[3*i:3*i+3].trim(t_start, t_end)
# 5.4 转换波形数据的范围,去除仪器响应的单位是 velocity(nm/s), fk用的单位是 velocity(cm/s)
for i in range(0,len(data),1):
data[i].data = 1e6*data[i].data
#%% 6. 旋转到 ENZ 方向
if rotate_mode == True:
for i in range(0,len(data)):
data[i].stats.back_azimuth = data[i].stats.sac.baz
data_enz = rotate(data=data,chn=chn) # 反方位角需要有定义 data[i].stats.back_azimuth
else:
data_enz = data
#%% 7. 输出 ASDF 与 sac 文件
source_lat = data_enz[0].stats.sac.evla
source_lon = data_enz[0].stats.sac.evlo
source_depth = data_enz[0].stats.sac.evdp
source_time = data_enz[0].stats.starttime + data_enz[0].stats.sac.b # UTCDateTime()
Output_file_path = os.path.join(Output_path, ASDF_filename+'.h5')
catalog_create = get_QuakeML(source_name, source_lat, source_lon, source_depth,source_time)
Add_quakeml(Output_file_path,catalog_create)
Station_num = round(len(data_enz)/3)
for i in range(0,Station_num,1): # 循环所有台站,写 ASDF 文件
net_sta_name = data_enz[3*i].stats.network+'_'+data[3*i].stats.station
station_network = data_enz[3*i].stats.network
station_name = data_enz[3*i].stats.station
station_lat = data_enz[3*i].stats.sac.stla
station_lon = data_enz[3*i].stats.sac.stlo
station_depth = 0
station_distance = data_enz[3*i].stats.sac.dist
station_az = data_enz[3*i].stats.sac.az
station_baz = data[3*i].stats.sac.baz
tp = data_enz[3*i].stats.sac.t1
ts = data_enz[3*i].stats.sac.t2
stream = data_enz[3*i:3*i+3].copy()
starttime = data_enz[3*i].stats.starttime
time_before_first_arrival = p_n0/samp_freq
inv=get_StationXML(station_network,
station_name,
station_lat,
station_lon,
station_depth,
'enz') # 生成台站元
# Add_waveforms_head_info([stream],
# station_network,
# station_name,
# 'enz',
# starttime) # 修改头文件中 channel 的信息顺序,变成 E/N/Z 顺序 (该版本不需要)
print("now syn sac "+station_network+'_'+station_name+':\n')
print('lat:',station_lat,' lon:',station_lon)
print(stream)
print('\n\n')
Add_waveforms([stream],
catalog_create,
Output_file_path,
source_name,
tp,
ts,
station_distance,
station_az,
station_baz) # 添加波形
Add_stationxml(inv,
Output_file_path) # 添加台站元
# stream.plot()
for j in range(0,3,1): # 输出 sac 文件
CHN = stream[j].stats.channel
sac_filename = os.path.join(Output_path,source_name+'.'+station_network+'.'+station_name+'.'+CHN+'.sac')
stream[j].write(sac_filename, format='SAC')