import flickrapi
import json
import time
import pandas
#Flickr API Key/Secret
API_KEY = 'API KEY'
API_SECRET = 'API SECRET'
# create the API class
flickr = flickrapi.FlickrAPI(API_KEY, API_SECRET, cache=True)
#PhotoCollector class, where the input has to be the photos from flickr.walk command
class PhotoCollector:
def __init__(self, flickrwalk):
#flickrwalk as the only self
self.flickrwalk = flickrwalk
# Retrieving photos within 1 km from Wrigley Field in Chicago, IL
def flickr_walk(self):
#set a blank list to retrieve the list of photo and user profile id's
ph_id_list = []
per_id_list = []
#for loop retrieving the photo urls from flickr API
for i, photo_url in enumerate(photos):
if i > 500: # only collect the first 50 photos
break
# get the photo url
url = photo_url.get('url_c')
if url is not None:
# if there is a url to the photo, the for loop will put in the id numbers into both the lists.
ph_id_list.append(photo_url.get('id'))
per_id_list.append(photo_url.get('owner'))
#return the lists so that the values are used in the Photo class.
return ph_id_list, per_id_list
class Photo:
def __init__(self, ph_id_list, per_id_list):
self.ph_id_list = ph_id_list
self.per_id_list = per_id_list
def attributes(self):
#create lists
#lists are blank but will be added through the loops.
uid_list = []
rname_list = []
phtitle_list = []
phdesc_list = []
phurl_list = []
lat_list = []
long_list = []
tag_list = []
date_list = []
regloc_list = []
count_list = []
uname_list = []
loc_list = []
prof_list = []
for i in ph_id_list: # looping in the photo information
#gets info from the individual photo
photo_info = flickr.photos.getInfo(photo_id= i, format='json')
#helps decode to a string.
photo_info_decode = photo_info.decode()
#load json
photo_info_load = json.loads(photo_info_decode)
# USERNAME attribute
uname = photo_info_load["photo"]["owner"]["username"]
uname_list.append(uname)
# REALNAME attribute
rname = photo_info_load["photo"]["owner"]["realname"]
rname_list.append(rname)
# PHOTO TITLE attribute
phtitle = photo_info_load["photo"]["title"]["_content"]
phtitle_list.append(phtitle)
# PHOTO DESCRIPTION attribute
phdesc = photo_info_load["photo"]["description"]["_content"]
phdesc_list.append(phdesc)
# PHOTO URL attribute
phurl = photo_info_load["photo"]["urls"]["url"]
for u in phurl: # url is stored as a dict nested in a list [{}], this loops digs it out and appends it to the list.
for k,item in u.items():
if k == "_content":
phurl_list.append(item)
# PHOTO COORDINATES attribute
ph_lat = photo_info_load["photo"]["location"]["latitude"]
lat_list.append(ph_lat)
ph_long = photo_info_load["photo"]["location"]["longitude"]
long_list.append(ph_long)
# PHOTO TAGS attribute
tags = photo_info_load["photo"]["tags"]["tag"]
raw_tag = []
for t in tags: # tags is stored as a dict nested in a list [{}], this loops digs it out and appends it to the list.
for k,item in t.items():
if k == "raw":
raw_tag.append(item)
tag_list.append(raw_tag)
# PHOTO TAKEN TIME attribute
date = photo_info_load["photo"]["dates"]["taken"]
date_list.append(date)
# PHOTO REGISTERED LOCATION attribute
regloc = photo_info_load["photo"]["owner"]["location"]
regloc_list.append(regloc)
for i in per_id_list: #person information - profile
#gets information from the person's profile
people_info = flickr.people.getInfo(user_id = i, format = 'json')
#helps decode to a string
people_info_decode = people_info.decode()
#bring to json
people_info_load = json.loads(people_info_decode)
# PHOTOS COUNT attribute
count = people_info_load["person"]["photos"]["count"]["_content"]
count_list.append(count)
# USER LOCATION attribute
loc= people_info_load["person"]
if "location" in loc: #some profile's do not share their location information, this queries if the user has one or not.
loc = people_info_load["person"]["location"]["_content"] #if they have location information, it is added
loc_list.append(loc)
else:
loc = "" #if they do not have location information, it will be left blank.
loc_list.append(loc)
# PROFILE LINK attribute
prof = people_info_load["person"]["profileurl"]["_content"]
prof_list.append(prof)
#using pandas to create a data frame that can be converted to a csv
#creating a dictionary where the key stores the values from the lists that were appended through the loops.
df = pandas.DataFrame(data={"Photo ID" : self.ph_id_list, "Person ID" : self.per_id_list, "Username" : uname_list, "Realname" : rname_list, "Photo Title" : phtitle_list, "Photo Description" : phdesc_list, "Photo URL" : phurl_list, "Latitude" : lat_list,
"Longitude" : long_list, "Date/Time Taken": date_list, "Photo Tags" : tag_list, "Photo Registered Location" : regloc_list, "Profile Living Location" : loc_list,
"Profile URL" : prof_list, "Photos Count" : count_list})
#this will take away the brackets when brought to csv
df["Photo Tags"] = df["Photo Tags"].apply(" , ".join)
print("\nTotal points found: ",len(df.index))
print ("\nCollection of Flickr photos has been completed.")
return df
# photos is the main variable used with the criteria wanted.
photos = flickr.walk(lat=41.8806908, lon=-87.6763646, radius=5, min_taken_date= '2010-01-01', max_taken_date= '2019-12-31', extras='url_c', tags = "Bulls")
#enter in photos to the class
pc = PhotoCollector(photos)
#now that photos is in PhotoCollector, you can execute the command.
results = pc.flickr_walk()
#since the lists are returned, you can store them in these lists for the Photo class.
ph_id_list = results[0]
per_id_list = results[1]
#bring the lists into Photo class
p = Photo(ph_id_list, per_id_list)
#exectute the attribute function in Photo class
unitedcenter = p.attributes()
import descartes #descartes needed in order to create the map
import geopandas as gpd
import matplotlib.patches as patches
def plotMap(data, lat, long):
frame = pandas.DataFrame({'Latitude': [lat],'Longitude':[long]})
loc = gpd.GeoDataFrame(frame, geometry=gpd.points_from_xy(frame.Longitude, frame.Latitude))
points = gpd.GeoDataFrame(data, geometry=gpd.points_from_xy(data.Longitude, data.Latitude))
cookcounty = gpd.read_file("shp/Cook_County.shp")
chi_boundary = gpd.read_file("shp/Boundaries - City.geojson")
loc.crs = {'init' :'epsg:26791'}
points.crs = {'init' :'epsg:26791'}
cookcounty.crs = {'init' :'epsg:26791'}
chi_boundary.crs = {'init' :'epsg:26791'}
base = chi_boundary.plot(color = 'black', edgecolor = 'white', figsize=(15, 15))
base.set_facecolor('black')
palette = {'Collected Points':'#7b3294', 'Location of Interest':'#008837'}
list_of_items = ['Collected Points','Location of Interest']
items_list =[]
for i in list_of_items:
label = i
color = palette[i]
items_list.append(patches.Patch(facecolor=color, label = label,alpha=0.9))
points.plot(ax=base, marker='o', color = '#7b3294', markersize = 2)
loc.plot(ax=base, marker='o', color = '#008837', markersize = 25)
base.legend(handles = items_list, fontsize=15)
plotMap(unitedcenter, 41.8806908, -87.6763646)
photos = flickr.walk(lat=41.948164, lon=-87.655798, radius=5, min_taken_date= '2010-01-01', max_taken_date= '2019-12-31', extras='url_c', tags = "Cubs")
pc = PhotoCollector(photos)
results = pc.flickr_walk()
ph_id_list = results[0]
per_id_list = results[1]
p = Photo(ph_id_list, per_id_list)
wrigleyfield = p.attributes()
plotMap(wrigleyfield, 41.948164, -87.655798)
photos = flickr.walk(lat=41.862188, lon=-87.616690, radius=5, min_taken_date= '2010-01-01', max_taken_date= '2019-12-31', extras='url_c', tags = "Bears")
pc = PhotoCollector(photos)
results = pc.flickr_walk()
ph_id_list = results[0]
per_id_list = results[1]
p = Photo(ph_id_list, per_id_list)
soldierfield = p.attributes()
plotMap(soldierfield, 41.862188, -87.616690)
photos = flickr.walk(lat=41.830509, lon=-87.6335052, radius=5, min_taken_date= '2010-01-01', max_taken_date= '2019-12-31', extras='url_c', tags = "Sox")
pc = PhotoCollector(photos)
results = pc.flickr_walk()
ph_id_list = results[0]
per_id_list = results[1]
p = Photo(ph_id_list, per_id_list)
g_rates = p.attributes()
plotMap(g_rates, 41.830509, -87.6335052)
photos = flickr.walk(lat=41.8806908, lon=-87.6763646, radius=5, min_taken_date= '2010-01-01', max_taken_date= '2019-12-31', extras='url_c', tags = "Blackhawks")
pc = PhotoCollector(photos)
results = pc.flickr_walk()
ph_id_list = results[0]
per_id_list = results[1]
p = Photo(ph_id_list, per_id_list)
uc_hawks = p.attributes()
plotMap(uc_hawks, 41.8806908, -87.6763646)
unitedcenter['Team'] = 'Bulls'
wrigleyfield['Team'] = 'Cubs'
soldierfield['Team'] = 'Bears'
g_rates['Team'] = 'White Sox'
uc_hawks['Team'] = 'Blackhawks'
frames = [unitedcenter, wrigleyfield, soldierfield, g_rates, uc_hawks]
combined = pandas.concat(frames)
combined = gpd.GeoDataFrame(combined, geometry=gpd.points_from_xy(combined.Longitude, combined.Latitude))
combined.crs = {'init' :'epsg:26791'}
chi_boundary = gpd.read_file("shp/Boundaries - City.geojson")
chi_boundary.crs = {'init' :'epsg:26791'}
base = chi_boundary.plot(color = 'black', edgecolor = 'white', figsize=(18, 18)) #make cook county black and the base of the map/plot
base.set_facecolor('black')
pointsPalette = {'Bulls':'#a6cee3', 'Cubs':'#1f78b4','Bears':'#b2df8a','White Sox':'#33a02c','Blackhawks':'#fb9a99'}
list_of_teams = ['Bulls','Cubs','Bears','White Sox','Blackhawks']
team_list =[]
for t in list_of_teams:
label = t
color = pointsPalette[t]
team_list.append(patches.Patch(facecolor=color, label = label,alpha=0.9))
for team, data in combined.groupby('Team'):
color = pointsPalette[team]
label = team
data.plot(color = color, ax = base, label = label, markersize = 5)
base.legend(handles = team_list, fontsize=15)
from sklearn import datasets
from sklearn.cluster import KMeans
import random
import numpy as np
from matplotlib import pyplot as plt
xy_list = []
for index,row in combined.iterrows():
coordinates = [] #coordinates dictionary
lat = (float(row["Latitude"])) #finds the latitude in the json
long = (float(row["Longitude"])) #finds the longitude in the json
coordinates.append(long)
coordinates.append(lat) #both lat and long are added into the coordinates list during each iteration.
xytuple = (long,lat)
xy_list.append(xytuple)
attributes = pandas.DataFrame(xy_list)
attributes.columns = ['X','Y']
model = KMeans(n_clusters=5, max_iter = 500)
model.fit(attributes)
print(model.labels_)
plt.figure(figsize=(18,18))
colormap = np.array(['#a6cee3', '#1f78b4', '#b2df8a','#33a02c','#fb9a99'])
plt.scatter(attributes.X, attributes.Y, c=colormap[model.labels_], s=30)
plt.title('K-Mean Clustering - 5 Clusters')
import os
os.system('jupyter nbconvert --to html FlickrAPI.ipynb')