-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.py
61 lines (51 loc) · 3.12 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import pandas as pd
import streamlit as st
import plotly.express as px
# deployment instructions: https://towardsdatascience.com/from-streamlit-to-heroku-62a655b7319
@st.cache
def get_data():
return pd.read_csv("http://data.insideairbnb.com/austria/vienna/vienna/2020-06-16/visualisations/listings.csv")
df = get_data()
st.title("Vienna AirBnB Analysis")
st.header("Visit these top sights in Vienna")
pics = {
"St. Stephan's Cathedral": "https://images.unsplash.com/photo-1516550893923-42d28e5677af?ixlib=rb-1.2.1&ixid=eyJhcHBfaWQiOjEyMDd9&auto=format&fit=crop&w=1652&q=80",
"Schoenbrunn Gardens": "https://images.unsplash.com/photo-1588836807555-ec6dfa2fefd2?ixlib=rb-1.2.1&ixid=eyJhcHBfaWQiOjEyMDd9&auto=format&fit=crop&w=1567&q=80",
"Castle Belvedere": "https://images.unsplash.com/photo-1526581671404-349f224db79b?ixlib=rb-1.2.1&ixid=eyJhcHBfaWQiOjEyMDd9&auto=format&fit=crop&w=800&q=60"
}
pic = st.selectbox("Choose Sight of Vienna", list(pics.keys()), 0)
st.image(pics[pic], use_column_width=True, caption=pics[pic])
st.subheader("Table overview available AirBnBs in Vienna")
st.markdown("Following are the top five most expensive properties.")
defaultcols = ["name", "host_name", "neighbourhood", "room_type", "price"]
cols = st.multiselect("Columns", df.columns.tolist(), default=defaultcols)
st.dataframe(df[cols].head(20))
st.header("Where are the most expensive properties located?")
st.subheader("On a map")
st.markdown("The following map shows the top 1% most expensive Airbnbs priced at $800 and above.")
st.map(df.query("price>=800")[["latitude", "longitude"]].dropna(how="any"))
st.header("Average price by room type")
st.table(df.groupby("room_type").price.mean().reset_index()\
.round(2).sort_values("price", ascending=False)\
.assign(avg_price=lambda x: x.pop("price").apply(lambda y: "%.2f" % y)))
st.header("Which host has the most properties listed?")
listingcounts = df.host_id.value_counts()
top_host_1 = df.query('host_id==@listingcounts.index[0]')
top_host_2 = df.query('host_id==@listingcounts.index[1]')
st.write(f"""**{top_host_1.iloc[0].host_name}** is at the top with {listingcounts.iloc[0]} property listings.
**{top_host_2.iloc[1].host_name}** is second with {listingcounts.iloc[1]} listings. """)
st.header("What is the distribution of property price?")
values = st.sidebar.slider("Price range", float(df.price.min()), float(df.price.clip(upper=1000.).max()), (50., 300.))
f = px.histogram(df.query(f"price.between{values}"), x="price", nbins=15, title="Price distribution")
f.update_xaxes(title="Price")
f.update_yaxes(title="No. of listings")
st.plotly_chart(f)
st.header("Properties by number of reviews")
st.write("Enter a range of numbers in the sidebar to view properties whose review count falls in that range.")
minimum = st.sidebar.number_input("Minimum", min_value=0.00)
maximum = st.sidebar.number_input("Maximum", min_value=0.00, value=5.00)
if minimum > maximum:
st.error("Please enter a valid range")
else:
df.query("@minimum<=number_of_reviews<=@maximum").sort_values("number_of_reviews", ascending=False)\
.head(50)[["name", "number_of_reviews", "neighbourhood", "host_name", "room_type", "price"]]