1
1
from datetime import datetime
2
+ from io import StringIO
2
3
3
- from bs4 import BeautifulSoup as bs
4
- from loguru import logger
5
4
import numpy as np
6
5
import pandas as pd
7
6
import requests
7
+ from bs4 import BeautifulSoup as bs
8
+ from loguru import logger
8
9
from requests .adapters import HTTPAdapter
9
10
from tqdm import tqdm
10
11
from urllib3 .util import Retry
11
12
from user_agent import generate_user_agent
12
13
13
-
14
14
# A public league for current week and player IDs
15
- PUBLIC_LEAGUE = 39452
15
+ PUBLIC_LEAGUE = 16
16
+ PUBLIC_LEAGUE_IDP = 762
17
+ SEARCH_PLAYER_GROUPS = ["QB" , "WR" , "RB" , "TE" , "K" , "DEF" ]
18
+ SEARCH_PLAYER_GROUPS_IDP = ["QB" , "WR" , "RB" , "TE" , "K" , "D" , "DB" , "DL" , "LB" ]
16
19
17
20
18
21
def create_session ():
19
22
"""Create requests session with retries and random user-agent"""
20
23
s = requests .Session ()
21
24
s .headers = {
25
+ "Accept" : "text/html" ,
26
+ "Accept-Encoding" : "gzip, deflate, br" ,
27
+ "Accept-Language" : "en-US" ,
22
28
"Content-Type" : "application/x-www-form-urlencoded;charset=utf-8" ,
23
29
"User-Agent" : generate_user_agent (),
24
30
}
@@ -30,18 +36,19 @@ def create_session():
30
36
return s
31
37
32
38
33
- def scrape (league ):
39
+ def scrape (league , is_IDP : bool = False ):
34
40
"""Scrape data
35
41
36
42
:param league: league ID
43
+ :param is_IDP: (bool) is this a individual defense player (IDP) league?
37
44
"""
38
45
39
46
# Start timer
40
47
startTime = datetime .now ()
41
48
42
49
# Scrape player IDs and teams from a public league
43
50
data = set ()
44
- groups = [ "QB" , "WR" , "RB" , "TE" , "K" , "DEF" ]
51
+ groups = SEARCH_PLAYER_GROUPS_IDP if is_IDP else SEARCH_PLAYER_GROUPS
45
52
s = create_session ()
46
53
for group in groups :
47
54
logger .info ("Scraping all {}..." .format (group ))
@@ -51,7 +58,7 @@ def scrape(league):
51
58
s .headers ["User-Agent" ] = generate_user_agent ()
52
59
r = s .get (
53
60
"https://football.fantasysports.yahoo.com/f1/{}/players" .format (
54
- PUBLIC_LEAGUE
61
+ PUBLIC_LEAGUE_IDP if is_IDP else PUBLIC_LEAGUE
55
62
),
56
63
params = dict (
57
64
count = i * 25 ,
@@ -64,12 +71,14 @@ def scrape(league):
64
71
i += 1
65
72
soup = bs (r .text , "lxml" )
66
73
table = soup .select_one ("#players-table table" )
74
+ if not table :
75
+ break
67
76
rows = table .select ("tbody tr" )
68
77
if not rows :
69
78
break
70
79
for row in rows :
71
80
td = row .select ("td" )[1 ]
72
- ID = td .select_one ("span .player-status a" )["data-ys-playerid" ]
81
+ ID = td .select_one (".player-status a" )["data-ys-playerid" ]
73
82
ID = int (ID )
74
83
team = td .select_one (".ysf-player-name span" ).text
75
84
team = team .split ()[0 ]
@@ -115,7 +124,7 @@ def get_projections(row):
115
124
row ["% Owned" ] = playerinfo .select_one ("dd.owned" ).text .split ()[0 ]
116
125
117
126
# Weekly projections
118
- df2 = pd .read_html (html )[0 ]
127
+ df2 = pd .read_html (StringIO ( html ) )[0 ]
119
128
for _ , row2 in df2 .iterrows ():
120
129
week = "Week {}" .format (row2 ["Week" ])
121
130
points = row2 ["Fan Pts" ]
@@ -149,7 +158,15 @@ def get_projections(row):
149
158
columns = ["Week {}" .format (i ) for i in range (current_week (), 18 )]
150
159
df ["Remaining" ] = df [columns ].sum (axis = 1 )
151
160
available = df .loc [df ["Owner ID" ].isnull ()]
152
- means = available .groupby (["Position" ])["Remaining" ].nlargest (3 ).mean (level = 0 )
161
+ means = (
162
+ available .groupby (["Position" ])["Remaining" ].nlargest (3 ).groupby (level = 0 ).mean ()
163
+ )
164
+ for positions in means .index :
165
+ if "," in positions :
166
+ for position in positions .split ("," ):
167
+ position = position .strip ()
168
+ if position not in means :
169
+ means [position ] = means [positions ]
153
170
df ["VOR" ] = df .apply (
154
171
lambda row : row ["Remaining" ]
155
172
- max (means [n .strip ()] for n in row ["Position" ].split ("," )),
0 commit comments