Skip to content

Commit 89efee9

Browse files
authored
fix scrapping for play by play and added deprecated team names (#242)
1 parent 2dbd9d7 commit 89efee9

File tree

2 files changed

+16
-6
lines changed

2 files changed

+16
-6
lines changed

basketball_reference_web_scraper/data.py

+9
Original file line numberDiff line numberDiff line change
@@ -44,12 +44,15 @@ class Team(Enum):
4444
WASHINGTON_WIZARDS = "WASHINGTON WIZARDS"
4545

4646
# DEPRECATED TEAMS
47+
KANSAS_CITY_KINGS = "KANSAS CITY KINGS"
4748
CHARLOTTE_BOBCATS = "CHARLOTTE BOBCATS"
4849
NEW_JERSEY_NETS = "NEW JERSEY NETS"
4950
NEW_ORLEANS_HORNETS = "NEW ORLEANS HORNETS"
5051
NEW_ORLEANS_OKLAHOMA_CITY_HORNETS = "NEW ORLEANS/OKLAHOMA CITY HORNETS"
5152
SEATTLE_SUPERSONICS = "SEATTLE SUPERSONICS"
53+
ST_LOUIS_HAWKS = "ST. LOUIS HAWKS"
5254
VANCOUVER_GRIZZLIES = "VANCOUVER GRIZZLIES"
55+
WASHINGTON_BULLETS = "WASHINGTON BULLETS"
5356

5457

5558
class OutputType(Enum):
@@ -144,13 +147,16 @@ class Division(Enum):
144147
'WAS': Team.WASHINGTON_WIZARDS,
145148

146149
# DEPRECATED TEAMS
150+
'KCK': Team.KANSAS_CITY_KINGS,
147151
'NJN': Team.NEW_JERSEY_NETS,
148152
'NOH': Team.NEW_ORLEANS_HORNETS,
149153
'NOK': Team.NEW_ORLEANS_OKLAHOMA_CITY_HORNETS,
150154
'CHA': Team.CHARLOTTE_BOBCATS,
151155
'CHH': Team.CHARLOTTE_HORNETS,
152156
'SEA': Team.SEATTLE_SUPERSONICS,
157+
'STL': Team.ST_LOUIS_HAWKS,
153158
'VAN': Team.VANCOUVER_GRIZZLIES,
159+
"WSB": Team.WASHINGTON_BULLETS,
154160
}
155161

156162
TEAM_TO_TEAM_ABBREVIATION = {v: k for k, v in TEAM_ABBREVIATIONS_TO_TEAM.items()}
@@ -190,11 +196,14 @@ class Division(Enum):
190196

191197
# DEPRECATED TEAMS
192198
"CHARLOTTE BOBCATS": Team.CHARLOTTE_BOBCATS,
199+
"KANSAS CITY KINGS": Team.KANSAS_CITY_KINGS,
193200
"NEW JERSEY NETS": Team.NEW_JERSEY_NETS,
194201
"NEW ORLEANS HORNETS": Team.NEW_ORLEANS_HORNETS,
195202
"NEW ORLEANS/OKLAHOMA CITY HORNETS": Team.NEW_ORLEANS_OKLAHOMA_CITY_HORNETS,
196203
"SEATTLE SUPERSONICS": Team.SEATTLE_SUPERSONICS,
204+
"ST. LOUIS HAWKS": Team.ST_LOUIS_HAWKS,
197205
"VANCOUVER GRIZZLIES": Team.VANCOUVER_GRIZZLIES,
206+
"WASHINGTON BULLETS": Team.WASHINGTON_BULLETS,
198207
}
199208

200209
POSITION_ABBREVIATIONS_TO_POSITION = {

basketball_reference_web_scraper/html.py

+7-6
Original file line numberDiff line numberDiff line change
@@ -669,6 +669,7 @@ def playoff_box_scores_table_container_query(self):
669669
Thus, the comment containing the playoff table is identified and parsed and then fed into lxml to create the element
670670
tree that will eventually be rendered on the page.
671671
"""
672+
672673
@property
673674
def playoff_box_scores_table(self):
674675
matching_containers = self.html.xpath(self.playoff_box_scores_table_container_query)
@@ -770,8 +771,8 @@ def team_names_query(self):
770771
return \
771772
'//*[@id="content"]' \
772773
'//div[@class="scorebox"]' \
773-
'//div[@itemprop="performer"]' \
774-
'//a[@itemprop="name"]'
774+
'//strong' \
775+
'//a'
775776

776777
@property
777778
def play_by_play_table(self):
@@ -1065,17 +1066,17 @@ def league_abbreviation_query(self):
10651066
@property
10661067
def league_abbreviations(self):
10671068
abbreviations = self.html.xpath(self.league_abbreviation_query)
1068-
1069+
10691070
if len(abbreviations) > 0:
10701071
return abbreviations[0].text_content()
10711072

10721073
return None
1073-
1074+
10741075

10751076
class PlayerPageTotalsRow:
10761077
def __init__(self, html):
10771078
self.html = html
1078-
1079+
10791080
@property
10801081
def league_abbreviation(self):
10811082
league_abbreviation_cells = self.html.xpath('.//td[@data-stat="lg_id"]')
@@ -1111,7 +1112,7 @@ def __eq__(self, other):
11111112
class PlayerPage:
11121113
def __init__(self, html):
11131114
self.html = html
1114-
1115+
11151116
@property
11161117
def name(self):
11171118
name_headers = self.html.xpath('.//h1[@itemprop="name"]')

0 commit comments

Comments
 (0)