from lec_utils import *

res = requests.get('https://events.umich.edu')
res

<Response [200]>

soup = BeautifulSoup(res.text)

divs = soup.find_all(class_='col-xs-12')

len(divs)

9

divs[0]

<div class="col-xs-12 col-sm-4 col-md-4 col-lg-2 flex no-pad">
<div class="event-listing-grid event-single">
<time class="time-banner" datetime="2025-05-03 11:00"><i class="fa fa-clock-o"></i> May 03, 2025 11:00am</time>
<div class="list-image">
<a alt="Justin Roberts" href="/event/133745" style="background:url(https://events.umich.edu/media/cache/event_list_2x/media/attachments/2025/03/event_133745_original-1.jpg) center center no-repeat; background-size:cover; position:absolute; width:100%;height:100%; top:0px;left:0px;" title="Justin Roberts">
</a>
</div>
<div class="event-info">
<div class="event-title"><h3>
<a href="/event/133745" title="Justin Roberts &amp; The Not Ready for Naptime Players">
    Justin Roberts &amp; The Not Ready for Naptime...
    </a></h3>
<h4>Presented by The Ark.</h4>
</div>
<ul class="event-details">
<li class="item">
<a href="/list?filter=locations:1" title="GA - The Ark"><i class="fa fa-location-arrow fa-fw"></i><span> GA - The Ark</span></a>
</li>
<li class="item"><a href="/group/1053" title="Michigan Union Ticket Office (MUTO)"><i class="fa fa-group fa-fw"></i><span>
        Michigan Union Ticket...
    </span></a></li>
<li class="item"><a href="/list?filter=alltypes:15"><i class="fa fa-list fa-fw"></i><span> Performance </span></a></li>
<li class="item"><a href="https://mutotix.umich.edu/5580/5581">
<i class="fa fa-ticket fa-fw"></i>
<span>Purchase tickets here!</span>
</a></li>
<li class="item"><a href="https://theark.org/support-the-ark/">
<i class="fa fa-link fa-fw"></i>
<span>Support The Ark!</span>
</a></li>
<li class="item"><a href="https://www.justinrobertsmusic.com/">
<i class="fa fa-link fa-fw"></i>
<span>Justin Roberts</span>
</a></li>
<li class="item"><a href="https://www.youtube.com/watch?v=7K09PrjXwbU">
<i class="fa fa-youtube fa-fw"></i>
<span>https://www.youtube.com/watch?v=7K09PrjXwbU</span>
</a></li>
</ul>
<!--
    <p>
    “Among the best craftsmen of sweet and silly kid tunes out there, making irresistible music out of small, well-observed moments from the...
    (
        2025-05-03 11:00am
    )
    </p>
-->
</div>
</div>
</div>

divs[0].find('div', class_='event-title').find('a').get('title')

'Justin Roberts & The Not Ready for Naptime Players'

divs[0].find('time').get('datetime')

'2025-05-03 11:00'

divs[0].find('ul').find('a').get('title')

'GA - The Ark'

def process_event(div):
    title = div.find('div', class_='event-title').find('a').get('title')
    location = div.find('ul').find('a').get('title')
    time = pd.to_datetime(div.find('time').get('datetime')) # Good idea!
    return {'title': title, 'time': time, 'location': location}

process_event(divs[12])

---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
Cell In[11], line 1
----> 1 process_event(divs[12])

IndexError: list index out of range

row_list = []
for div in divs:
    try:
        row_list.append(process_event(div))
    except Exception as e:
        print(e)

'NoneType' object has no attribute 'find'

events = pd.DataFrame(row_list)
events.head()

# Which events are in-person today?
events[~events['location'].isin(['Virtual', ''])]

	title	time	location
0	Justin Roberts & The Not Ready for Naptime Pla...	2025-05-03 11:00:00	GA - The Ark
1	Read and Look \| The Water Princess	2025-05-03 11:15:00	Kelsey Museum of Archaeology
2	Women's Tennis vs Arizona State	2025-05-03 13:00:00	Varsity Tennis Bldg
3	Screening: 2025 IP Exhibition	2025-05-03 14:00:00	Art & Architecture Building, 2000 Bonistee...
4	Men's Lacrosse vs Big Ten Championship	2025-05-03 19:00:00	U-M Lacrosse Stadium

	title	time	location
0	Justin Roberts & The Not Ready for Naptime Pla...	2025-05-03 11:00:00	GA - The Ark
1	Read and Look \| The Water Princess	2025-05-03 11:15:00	Kelsey Museum of Archaeology
2	Women's Tennis vs Arizona State	2025-05-03 13:00:00	Varsity Tennis Bldg
...	...	...	...
5	Men's Lacrosse vs Big Ten Championship	2025-05-03 20:00:00	U-M Lacrosse Stadium
6	Men's Lacrosse vs Big Ten Championship	2025-05-03 20:00:00	U-M Lacrosse Stadium
7	The Wildwoods	2025-05-03 20:00:00	ARK Reserved

Discussion Slides: Visualization, Imputation, and Web Scraping

Agenda 📆¶

Example: Scraping the Happening @ Michigan page¶

Example: Scraping the Happening @ Michigan page¶

Identifying `<div>`s¶

Parsing a single event, and then every event¶

Discussion Slides: Visualization, Imputation, and Web Scraping

Agenda 📆¶

Example: Scraping the Happening @ Michigan page¶

Example: Scraping the Happening @ Michigan page¶

Identifying <div>s¶

Parsing a single event, and then every event¶

Identifying `<div>`s¶