import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt


df = pd.read_csv('Meteorite_Landings.csv', index_col=0)

# dropping all null rows to handle smaller sample size
df = df.dropna()


y = list()

for index, row in df.iterrows():
  y.append(row['year'][6:10])

df['years'] = y
df["years"] = pd.to_numeric(df["years"])

df = df.loc[df.years > 1900]

df.head()


# for the map description, lets add a description column

description = "Mass: " + df['mass (g)'].astype(str) + ", Name: " + df.index.values.astype(str)
df.insert(2, "Description", description, True)
df


df_fell = df[(df['fall'] == 'Fell')]
df_fell


import matplotlib
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
plt.rcParams.update({'font.size': 20, 'figure.figsize': (15, 8)}) 

# for coordinates
import requests
from shapely.geometry import mapping, shape
from shapely.prepared import prep
from shapely.geometry import Point


# reverse-geo locating coordinates to countries
# using github which was faster than google api
geoData = requests.get("https://raw.githubusercontent.com/datasets/geo-countries/master/data/countries.geojson").json()
country_list = list()

geo = df["GeoLocation"].values.tolist()
lat = df["reclat"].values.tolist()
lon = df["reclong"].values.tolist()

# stripping and cleaning up data coordinates
geo = [i.strip('()') for i in geo]
geo = [i.replace(' ', "") for i in geo]
geo = [i.replace(',', " ") for i in geo]
geo = [i.split(' ') for i in geo]
for i in range(len(geo)):
  geo[i][0] = float(geo[i][0])
  geo[i][1] = float(geo[i][1])

strtemp = " ".join([str(i) for i in geo])
geo = strtemp.replace("[", "").replace("]", "").replace(",", " ").replace("  ", " ").split(" ")

countries = {}
for i in geoData["features"]:
    geom = i["geometry"]
    country = i["properties"]["ADMIN"]
    countries[country] = prep(shape(geom))

def get_country(lon, lat):
    point = Point(lon, lat)
    for country, geom in countries.items():
        if geom.contains(point):
            return country
    return "unknown"

# make list of countries if coordinates are known
for i in range(len(lon)):
  if get_country(lat[i], lon[i]) != "unknown":
    country_list.append(get_country(lat[i], lon[i]))
  else:
    country_list.append("Unknown")
  
# adding a country column
df["country"] = country_list

# Displays country with most/least frequent landings
countries = list()

for i in country_list:
    if i not in countries and i != "Unknown":
        countries.append(i)
        
cc = [i for i in country_list if i != "Unknown"]
biggest_country = max(set(cc), key = cc.count) 
smallest_country = min(set(cc), key = cc.count) 

print("Country with the most frequent landings is:", biggest_country)
print("Country with the least frequent landings is:", smallest_country)

Country with the most frequent landings is: Sudan
Country with the least frequent landings is: Cape Verde


# dropping unknown countries
df = df[df.country != "Unknown"]
df


# Displays bar chart to show country's with landings
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = (29,26)

plt.hist(df['country'], bins=70, alpha=0.5, histtype='bar', ec='black');
plt.grid()
plt.yticks(np.arange(0, 1500, 30))

plt.xticks(
    rotation=45, 
    horizontalalignment = 'right',
    fontweight = 'light',
    fontsize = 16
)

plt.ylabel('Landing Frequency', size=20)
plt.xlabel('Countries', size=20)
plt.title('Meteor Landing Frequency per Recorded Country (1901-2013)', size=20)
plt.show()

plt.savefig("Meteor_Frequency_Countries_Plot.pdf")

<Figure size 2088x1872 with 0 Axes>


# what we will color each cluster group on the map
cols = ['red', 'green', 'blue', 'orange', 'yellow', 'purple']


import folium

def mapping(df, cluster_column):
    m = folium.Map(location=[df.reclat.mean(), df.reclong.mean()], zoom_start=2)
    for index, row in df.iterrows():
      if row[cluster_column] == -1: # outliers will be left black
            cluster_colour = 'black'
      else:
            cluster_colour = cols[row[cluster_column]]
  
      folium.CircleMarker(
            location= [row['reclat'], row['reclong']],
            radius=3,
            popup= row['Description'],
            color=cluster_colour,
            fill=True,
            fill_color=cluster_colour
        ).add_to(m)
        
    return m


from sklearn.cluster import DBSCAN

location = list(zip(df["reclat"], df["reclong"]))
db = DBSCAN(eps=0.1, min_samples=30, algorithm='ball_tree', metric='haversine').fit(np.radians(location))

classes = db.labels_
df.insert(2, "db_labels", classes, True)
map = mapping(df, "db_labels")

map


plt.xticks(
    rotation=45, 
    horizontalalignment = 'right',
    fontweight = 'light',
    fontsize = 13
)

plt.hist(df['years'], bins=70, alpha=0.5, histtype='bar', ec='black')

plt.xlabel("Years");
plt.ylabel("Meteorite Frequency");
plt.title("Meteorite Frequency as Years Increase");

plt.show();

plt.savefig("Meteorite_Frequency_Plot.pdf");

<Figure size 2088x1872 with 0 Axes>


# lets see if there were supernovas during the years of asteroids hitting earth
df2 = pd.read_csv('The Open Supernova Catalog (1).csv', index_col=0)
df2 = df2.dropna(subset=['Disc. Date']) # dropping all null rows that dont contain a date
df2 = df2.drop(['R.A.', 'Dec.', 'Type', 'Phot.', 'Spec.', 'Radio', 'X-ray', 'mmax'], axis=1) # stuff we dont care about

df2


# Lets break up year column into just years
year = list()

for index, row in df2.iterrows():
  year.append(row['Disc. Date'][:4])


from operator import contains
# getting rid of invalid years

df2['year'] = year
df2 = df2[df2.year.str.contains("^19|20\d{2}$")]

df2


#let's drop years before 1490 and after 2013 to match years in df
df2.insert(3, "years", pd.to_numeric(df2["year"]), True)
df2 = df2.drop(['year'], axis=1)
df2.loc[df2.years < 2014]

df2.head()


plt.xticks(
    rotation=45, 
    horizontalalignment = 'right',
    fontweight = 'light',
    fontsize = 13
)

plt.hist(df2['years'], bins=70, alpha=0.5, histtype='bar', ec='black')

plt.xlabel("Years")
plt.ylabel("Supernova Frequency")
plt.title("Supernova Frequency as Years Increase")

plt.show();

plt.savefig("Supernova_Frequency_Plot.pdf")

<Figure size 2088x1872 with 0 Axes>


df_1 = pd.DataFrame()
df_2 = pd.DataFrame()

df_1["supernovas"] = df2["years"].value_counts()
df_1 = df_1.reset_index()
df_1 = df_1.rename(columns={"index": "years"})
df_1 = df_1.sort_values(by="years")


df_2["meteorites"] = df["years"].value_counts()
df_2 = df_2.reset_index()
df_2 = df_2.rename(columns={"index": "years"})
df_2 = df_2.sort_values(by="years")

df_major = pd.merge(df_1, df_2, on=["years"])


df_major


# fitting to linear regression equation 
import statsmodels.api as sm
from statsmodels.formula.api import ols

fit = np.polyfit(df_major["supernovas"].astype(int), df_major["meteorites"],1)
x = fit[0]
intercept = fit[1]
y = x*(df_major["supernovas"]) + intercept

print('y = {:.2f} * supernovas + ({:.2f})'.format(x, intercept))

result = ols(formula = "meteorites ~ supernovas", data=df_major).fit()
print(result.summary())

y = 0.02 * supernovas + (37.96)
                            OLS Regression Results                            
==============================================================================
Dep. Variable:             meteorites   R-squared:                       0.017
Model:                            OLS   Adj. R-squared:                  0.005
Method:                 Least Squares   F-statistic:                     1.442
Date:                Mon, 20 Dec 2021   Prob (F-statistic):              0.233
Time:                        13:10:39   Log-Likelihood:                -501.32
No. Observations:                  86   AIC:                             1007.
Df Residuals:                      84   BIC:                             1012.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     37.9595      9.777      3.882      0.000      18.516      57.403
supernovas     0.0191      0.016      1.201      0.233      -0.013       0.051
==============================================================================
Omnibus:                       59.433   Durbin-Watson:                   0.420
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              188.589
Skew:                           2.491   Prob(JB):                     1.12e-41
Kurtosis:                       8.274   Cond. No.                         669.
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.


result.params

Intercept     37.959496
supernovas     0.019121
dtype: float64


sns.set(rc = {'figure.figsize':(20,8)})
ax = sns.lmplot(x="supernovas", y="meteorites", data=df_major, height=10);

ax.fig.suptitle("Scatter Plot of Meteorite frequencies as Supernovas Occur");

plt.savefig("Scatter_Super_and_Meteor_Plot.pdf")

	id	nametype	recclass	mass (g)	fall	year	reclat	reclong	GeoLocation	years
name
Aarhus	2	Valid	H6	720.0	Fell	01/01/1951 12:00:00 AM	56.18333	10.23333	(56.18333, 10.23333)	1951
Abee	6	Valid	EH4	107000.0	Fell	01/01/1952 12:00:00 AM	54.21667	-113.00000	(54.21667, -113.0)	1952
Acapulco	10	Valid	Acapulcoite	1914.0	Fell	01/01/1976 12:00:00 AM	16.88333	-99.90000	(16.88333, -99.9)	1976
Achiras	370	Valid	L6	780.0	Fell	01/01/1902 12:00:00 AM	-33.16667	-64.95000	(-33.16667, -64.95)	1902
Adhi Kot	379	Valid	EH4	4239.0	Fell	01/01/1919 12:00:00 AM	32.10000	71.80000	(32.1, 71.8)	1919

	id	nametype	Description	recclass	mass (g)	fall	year	reclat	reclong	GeoLocation	years
name
Aarhus	2	Valid	Mass: 720.0, Name: Aarhus	H6	720.0	Fell	01/01/1951 12:00:00 AM	56.18333	10.23333	(56.18333, 10.23333)	1951
Abee	6	Valid	Mass: 107000.0, Name: Abee	EH4	107000.0	Fell	01/01/1952 12:00:00 AM	54.21667	-113.00000	(54.21667, -113.0)	1952
Acapulco	10	Valid	Mass: 1914.0, Name: Acapulco	Acapulcoite	1914.0	Fell	01/01/1976 12:00:00 AM	16.88333	-99.90000	(16.88333, -99.9)	1976
Achiras	370	Valid	Mass: 780.0, Name: Achiras	L6	780.0	Fell	01/01/1902 12:00:00 AM	-33.16667	-64.95000	(-33.16667, -64.95)	1902
Adhi Kot	379	Valid	Mass: 4239.0, Name: Adhi Kot	EH4	4239.0	Fell	01/01/1919 12:00:00 AM	32.10000	71.80000	(32.1, 71.8)	1919
...	...	...	...	...	...	...	...	...	...	...	...
Zillah 002	31356	Valid	Mass: 172.0, Name: Zillah 002	Eucrite	172.0	Found	01/01/1990 12:00:00 AM	29.03700	17.01850	(29.037, 17.0185)	1990
Zinder	30409	Valid	Mass: 46.0, Name: Zinder	Pallasite, ungrouped	46.0	Found	01/01/1999 12:00:00 AM	13.78333	8.96667	(13.78333, 8.96667)	1999
Zlin	30410	Valid	Mass: 3.3, Name: Zlin	H4	3.3	Found	01/01/1939 12:00:00 AM	49.25000	17.66667	(49.25, 17.66667)	1939
Zubkovsky	31357	Valid	Mass: 2167.0, Name: Zubkovsky	L6	2167.0	Found	01/01/2003 12:00:00 AM	49.78917	41.50460	(49.78917, 41.5046)	2003
Zulu Queen	30414	Valid	Mass: 200.0, Name: Zulu Queen	L3.7	200.0	Found	01/01/1976 12:00:00 AM	33.98333	-115.68333	(33.98333, -115.68333)	1976

	id	nametype	Description	recclass	mass (g)	fall	year	reclat	reclong	GeoLocation	years
name
Aarhus	2	Valid	Mass: 720.0, Name: Aarhus	H6	720.0	Fell	01/01/1951 12:00:00 AM	56.18333	10.23333	(56.18333, 10.23333)	1951
Abee	6	Valid	Mass: 107000.0, Name: Abee	EH4	107000.0	Fell	01/01/1952 12:00:00 AM	54.21667	-113.00000	(54.21667, -113.0)	1952
Acapulco	10	Valid	Mass: 1914.0, Name: Acapulco	Acapulcoite	1914.0	Fell	01/01/1976 12:00:00 AM	16.88333	-99.90000	(16.88333, -99.9)	1976
Achiras	370	Valid	Mass: 780.0, Name: Achiras	L6	780.0	Fell	01/01/1902 12:00:00 AM	-33.16667	-64.95000	(-33.16667, -64.95)	1902
Adhi Kot	379	Valid	Mass: 4239.0, Name: Adhi Kot	EH4	4239.0	Fell	01/01/1919 12:00:00 AM	32.10000	71.80000	(32.1, 71.8)	1919
...	...	...	...	...	...	...	...	...	...	...	...
Zemaitkiemis	30399	Valid	Mass: 44100.0, Name: Zemaitkiemis	L6	44100.0	Fell	01/01/1933 12:00:00 AM	55.30000	25.00000	(55.3, 25.0)	1933
Zhaodong	30404	Valid	Mass: 42000.0, Name: Zhaodong	L4	42000.0	Fell	01/01/1984 12:00:00 AM	45.81667	125.91667	(45.81667, 125.91667)	1984
Zhovtnevyi	30407	Valid	Mass: 107000.0, Name: Zhovtnevyi	H6	107000.0	Fell	01/01/1938 12:00:00 AM	47.58333	37.25000	(47.58333, 37.25)	1938
Zhuanghe	30408	Valid	Mass: 2900.0, Name: Zhuanghe	H5	2900.0	Fell	01/01/1976 12:00:00 AM	39.66667	122.98333	(39.66667, 122.98333)	1976
Zvonkov	30415	Valid	Mass: 2568.0, Name: Zvonkov	H6	2568.0	Fell	01/01/1955 12:00:00 AM	50.20000	30.25000	(50.2, 30.25)	1955

	id	nametype	Description	recclass	mass (g)	fall	year	reclat	reclong	GeoLocation	years	country
name
Aioun el Atrouss	423	Valid	Mass: 1000.0, Name: Aioun el Atrouss	Diogenite-pm	1000.0	Fell	01/01/1974 12:00:00 AM	16.39806	-9.57028	(16.39806, -9.57028)	1974	Angola
Aïr	424	Valid	Mass: 24000.0, Name: Aïr	L6	24000.0	Fell	01/01/1925 12:00:00 AM	19.08333	8.38333	(19.08333, 8.38333)	1925	Central African Republic
Akwanga	432	Valid	Mass: 3000.0, Name: Akwanga	H	3000.0	Fell	01/01/1959 12:00:00 AM	8.91667	8.43333	(8.91667, 8.43333)	1959	Nigeria
Al Zarnkh	447	Valid	Mass: 700.0, Name: Al Zarnkh	LL5	700.0	Fell	01/01/2001 12:00:00 AM	13.66033	28.96000	(13.66033, 28.96)	2001	Libya
Alberta	454	Valid	Mass: 625.0, Name: Alberta	L	625.0	Fell	01/01/1949 12:00:00 AM	2.00000	22.66667	(2.0, 22.66667)	1949	Algeria
...	...	...	...	...	...	...	...	...	...	...	...	...
Zerhamra	30403	Valid	Mass: 630000.0, Name: Zerhamra	Iron, IIIAB-an	630000.0	Found	01/01/1967 12:00:00 AM	29.85861	-2.64500	(29.85861, -2.645)	1967	Rwanda
Zillah 001	31355	Valid	Mass: 1475.0, Name: Zillah 001	L6	1475.0	Found	01/01/1990 12:00:00 AM	29.03700	17.01850	(29.037, 17.0185)	1990	Sudan
Zillah 002	31356	Valid	Mass: 172.0, Name: Zillah 002	Eucrite	172.0	Found	01/01/1990 12:00:00 AM	29.03700	17.01850	(29.037, 17.0185)	1990	Sudan
Zinder	30409	Valid	Mass: 46.0, Name: Zinder	Pallasite, ungrouped	46.0	Found	01/01/1999 12:00:00 AM	13.78333	8.96667	(13.78333, 8.96667)	1999	Cameroon
Zlin	30410	Valid	Mass: 3.3, Name: Zlin	H4	3.3	Found	01/01/1939 12:00:00 AM	49.25000	17.66667	(49.25, 17.66667)	1939	Yemen

	Disc. Date	Host Name
Name
SN2011fe	2011/08/24	M101
SN1987A	1987/02/24	LMC
SN2003dh	2003/03/31	A104450+2131
SN2013dy	2013/07/10	NGC 7250
SN2013ej	2013/07/25	NGC 628
...	...	...
SN1985M	1985/06/16	A220830-4830
SN1988M	1988/04/07	NGC 4496B
SN386A	386/04/30	Milky Way
SN393A	393/02/27	Milky Way
SN837A	837/04/29	Milky Way

An Analysis of Meteorites: More Than Just Rocks in Space?¶

Introduction¶

Getting Started...¶

Cleaning Up Our Data¶

Now that we have cleaned up our first dataset, let's do some analysis on it!¶

Creating df_fell¶

Mapping Out Coordinates¶

Getting Ready To Visualize¶

Creating our Map¶

Graphing Frequencies Over a Period of Time¶

Parsing our Second Database¶

Fitting a Regression Model¶

Conclusion¶

	years	supernovas	meteorites
0	1901	2	4
1	1907	1	5
2	1912	1	1
3	1914	1	5
4	1915	1	3
...	...	...	...
81	2009	882	74
82	2010	1985	63
83	2011	1734	35
84	2012	1691	4
85	2013	2486	1