This project collects and analyzes top NCAA Division III track and field performances from 2011–2025. It scrapes official results from TFRRS, cleans and converts performance data, applies wind adjustments for sprint events, calculates yearly averages, and visualizes trends across events.
The data reveals a significant improvement in nearly every event in 2022, soon after the end of the COVID-19 pandemic and the addition of new Name, Image, and Likeness (NIL) policies to the NCAA. The pandemic allowed many athletes who missed a year of competition to receive a "covid year", an additional year of eligibility, while NIL gives athletes the opportunity to be sponsored and receive compensation.
Although 2024 was technically the final year of competition for athletes with an additional "covid year", there was still an improvement in performance in nearly all events. I argue that these are likely due to NIL policies. Since athletes in NCAA division 3 cannot receive scholarships, there was little to no financial incentive for top athletes to continue competing. However, top athletes now have the opportunity to monetize their continued athletic career.
Additional research would be necessary to further support this claim including research on NIL deals in NCAA division 3, comparison of improvements seen in divisions 1 and 2 where financial incentive was previously present (scholarships), and investigation into high school times to see if there is an overall improvement in track and field occurring beyond collegiate performances.
All visualizations can be found here
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import random
from convert_time import wind_conversion as w_conv
import matplotlib.pyplot as plt
import os
from dict_events import dict_events_function as events_dict
year_urls = {
2025: "https://tfrrs.org/lists/5020",
2024: "https://www.tfrrs.org/lists/4517/2024_NCAA_Division_III_Outdoor_Qualifying_FINAL",
2023: "https://m.tfrrs.org/lists/4043/2023_NCAA_Division_III_Outdoor_Qualifying_FINAL",
2022: "https://tfrrs.org/lists/3714/2022_NCAA_Division_III_Outdoor_Qualifying_FINAL",
2021: "https://soap.tfrrs.org/lists/3195/2021_NCAA_Division_III_Outdoor_Qualifying_FINAL",
2020: "https://tfrrs.org/lists/2907/2020_NCAA_Div_III_Outdoor_Qualifying",
2019: "https://tfrrs.org/lists/2572/2019_NCAA_Div_III_Outdoor_Qualifying_FINAL",
2018: "https://tfrrs.org/lists/2283/2018_NCAA_Div_III_Outdoor_Qualifying_FINAL",
2017: "https://www.tfrrs.org/lists/1914/2017_NCAA_Div_III_Outdoor_Qualifying_FINAL",
2016: "https://mobile.tfrrs.org/lists/1684/2016_NCAA_Division_III_Outdoor_Qualifying_FINAL",
2015: "https://m.tfrrs.org/lists/1443/2015_NCAA_Division_III_Outdoor_Qualifying_FINAL",
2014: "https://www.tfrrs.org/lists/1232/2014_NCAA_Division_III_Outdoor_Qualifying_FINAL",
2013: "https://tfrrs.org/lists/1033/2013_NCAA_Division_III_Outdoor_Qualifying_FINAL",
2012: "https://tfrrs.org/lists/842/2012_NCAA_Div_III_Outdoor_Qualifier_List",
2011: "https://tfrrs.org/lists/675/2011_NCAA_Division_III_Outdoor_POP_List_FINAL"
}
HEADERS = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/537.36"
}
def time_to_float(time_str):
minutes, rest = time_str.split(":")
minutes = int(minutes)
seconds = float(rest)
total_seconds = minutes * 60 + seconds
return total_seconds
def float_to_time(seconds_float):
minutes = int(seconds_float // 60)
seconds = seconds_float % 60
return f"{minutes:02d}:{seconds:05.2f}"
def fetch_html(url, year):
"""Fetch page or load from local cache if available"""
folder = "html_cache"
os.makedirs(folder, exist_ok=True)
path = os.path.join(folder, f"{year}.html")
if os.path.exists(path):
with open(path, "r") as f:
html = f.read()
else:
response = requests.get(url, headers=HEADERS)
if response.status_code != 200:
raise Exception(f"Failed to fetch {year}")
html = response.text
with open(path, "w") as f:
f.write(html)
time.sleep(random.uniform(2,5))
return html
def parse_top_50_times(html, event_class,event):
"""Parse HTML and return top 50 legal times with wind conversion"""
soup = BeautifulSoup(html, "html.parser")
times = []
event_block = soup.find("div", class_=event_class)
if not event_block:
print(f"⚠️ Could not find event block: {event_class}")
return times
rows = event_block.select(".performance-list-body > div")
needs_conversion=("Men's 100m", "Women's 100m","Men's 200m","Women's 200m")
for row in rows:
try:
columns = row.select("div")
if "x" in event:
time_str=columns[2].text.strip()
else:
time_str = columns[4].text.strip()
if ":" in time_str:
mark = time_to_float(time_str)
elif "m" in time_str:
mark = float(time_str[:-1])
else:
mark = float(time_str)
if event in needs_conversion:
wind_str = columns[7].text.strip() if len(columns) > 7 else "0"
wind_val = float(wind_str.replace("+", "").replace("w", "")) if wind_str else 0.0
mark = w_conv(mark, wind_val)
# Apply wind conversion
times.append(mark)
if len(times) >= 50:
break
except (ValueError, IndexError) as e:
continue
return times
yearly_averages = []
all_times = {}
events_dict=events_dict()
dont_convert=("Men's Decathlon","Women's Heptathlon")
for year, url in year_urls.items():
print(f"Processing {year}...")
try:
html = fetch_html(url, year)
for event,class_id in events_dict.items():
times = parse_top_50_times(html,class_id,event)
all_times[year] = times
if times:
avg_time = sum(times) / len(times)
yearly_averages.append({"year": year, "event": event, "avg_time": avg_time})
if avg_time>60 and event not in dont_convert:
avg_time=float_to_time(avg_time)
else:
avg_time=round(avg_time,2)
else:
print(f"No marks found for {event}")
except Exception as e:
print(f"Error processing {year}: {e}")
df = pd.DataFrame(yearly_averages).sort_values("year")
df.to_csv("d3_averages_2011-2025.csv", index=False)
import pandas as pd
import matplotlib.pyplot as plt
import os
import convert_time as conv
df = pd.read_csv("d3_averages_2011-2025.csv")
# Create output folder for plots
output_folder = "event_plots"
os.makedirs(output_folder, exist_ok=True)
# Define sprint events for which lower time is better
multi=("Men's Decathlon", "Women's Heptathlon")
sprints=("Men's 100m","Women's 100m","Men's 200m","Women's 200m","Men's 400m","Women's 400m", "Men's 110m Hurdles", "Women's 100m Hurdles", "Men's 400m Hurdles","Women's 400m Hurdles","Women's 4x100m Relay","Men's 4x100m Relay")
jumps=("Men's High Jump", "Women's High Jump","Men's Long Jump","Women's Long Jump","Men's Triple Jump","Women's Triple Jump","Men's Pole Vault","Women's Pole Vault")
mid_distance=("Men's 800m","Women's 800m", "Men's 4x400m Relay", "Women's 4x400m Relay","Men's 1500m", "Women's 1500m","Men's 3000m Steeplechase","Women's 3000m Steeplechase")
distance=("Men's 5000m","Women's 5000m","Men's 10000m", "Women's 10000m")
# Loop through each unique event
for event in df['event'].unique():
# Select data for this event
event_data = df[df['event'] == event].sort_values('year')
# Plot
plt.figure(figsize=(8,5))
plt.plot(event_data['year'], event_data['avg_time'], marker='o', linestyle='-')
plt.title(f"Progression of {event} From 2011 to 2025")
plt.xlabel("Year")
if any(event in t for t in (sprints, mid_distance, distance)):
plt.gca().invert_yaxis()
plt.ylabel("Average Time (s)")
elif event in multi:
plt.ylabel("Average Score (pts)")
else:
plt.ylabel("Average Mark (m)")
plt.grid(True)
plt.tight_layout()
# Save figure
filename = f"{output_folder}/{event.replace(' ','_')}.png"
plt.savefig(filename)
plt.close()
print(f"Saved plots for {len(df['event'].unique())} events in '{output_folder}' folder.")
def dict_events_function():
li_events_class=[]
event_names=[]
to_skip=(8,10,14,15,16,17,18,20,32,34,35,36,37,38)
men_only=(5,39)
women_only=(4,40)
for i in range(4,41):
if i not in to_skip and i not in men_only and i not in women_only:
li_events_class.append(f"row gender_m standard_event_hnd_{i}")
li_events_class.append(f"row gender_f standard_event_hnd_{i}")
elif i in women_only:
li_events_class.append(f"row gender_f standard_event_hnd_{i}")
elif i in men_only:
li_events_class.append(f"row gender_m standard_event_hnd_{i}")
event_names.append("Women's 100m Hurdles")
event_names.append("Men's 110m Hurdles")
event_names.append("Men's 100m")
event_names.append("Women's 100m")
event_names.append("Men's 200m")
event_names.append("Women's 200m")
event_names.append("Men's 400m Hurdles")
event_names.append("Women's 400m Hurdles")
event_names.append("Men's 400m")
event_names.append("Women's 400m")
event_names.append("Men's 800m")
event_names.append("Women's 800m")
event_names.append("Men's 1500m")
event_names.append("Women's 1500m")
event_names.append("Men's 3000m Steeplechase")
event_names.append("Women's 3000m Steeplechase")
event_names.append("Men's 5000m")
event_names.append("Women's 5000m")
event_names.append("Men's 10000m")
event_names.append("Women's 10000m")
event_names.append("Men's High Jump")
event_names.append("Women's High Jump")
event_names.append("Men's Pole Vault")
event_names.append("Women's Pole Vault")
event_names.append("Men's Long Jump")
event_names.append("Women's Long Jump")
event_names.append("Men's Triple Jump")
event_names.append("Women's Triple Jump")
event_names.append("Men's Discus")
event_names.append("Women's Discus")
event_names.append("Men's Hammer")
event_names.append("Women's Hammer")
event_names.append("Men's Javelin")
event_names.append("Women's Javelin")
event_names.append("Men's Shot Put")
event_names.append("Women's Shot Put")
event_names.append("Men's 4x100m Relay")
event_names.append("Women's 4x100m Relay")
event_names.append("Men's 4x400m Relay")
event_names.append("Women's 4x400m Relay")
event_names.append("Men's Decathlon")
event_names.append("Women's Heptathlon")
dict_events=dict(zip(event_names, li_events_class))
return dict_events