import pandas as pd
import numpy as np
import geopandas


covid_cases = geopandas.read_file('covid-variants.csv')
covid_cases['date'] = pd.to_datetime(covid_cases['date'])

/home/thedefect/anaconda3/envs/geotest/lib/python3.9/site-packages/geopandas/geodataframe.py:600: RuntimeWarning: Sequential read of iterator was interrupted. Resetting iterator. This can negatively impact the performance.
  for feature in features_lst:


omicron_cases = covid_cases[covid_cases.variant == 'Omicron'] # Selecting just the Omicron variant.
single_day = omicron_cases[omicron_cases['date'] == '2021-12-27'] # Selecting just one day to make things a bit simpler


country_locations = geopandas.tools.geocode(single_day.location) # generates a list of points from location names

single_day.index.equals(country_locations.index) # Let's see if the indices match, just to be sure.

True


single_day = single_day.assign(geometry=country_locations['geometry']) # Fill the Geometry column with our new point data
fixed_day = single_day.astype({'num_sequences': 'int64', 'num_sequences_total': 'int64'}) # Fixing datatypes so we can appropriately sort by them


one_day_omicron = fixed_day.sort_values(by='num_sequences').drop('date', axis=1)

one_day_omicron.explore(column='num_sequences', # We give the column we want map to our color gradient
                     tooltip='location', # hover text and popups can be set to display different values
                     popup='num_sequences',
                     marker_type='circle_marker', # We can control the type and size of map markers
                     marker_kwds={'radius':10})


# The GeoPandas geocode call grabbed the wrong Morocco, Switzerland, and Georgia, so we have to manually correct.

from geopy.geocoders import Photon
from shapely.geometry import Point

photon_fixer = Photon() # The Geocoding object that will make our requests

# .geocode is returning a list of entries thanks to 'exactly_one=False' so we can index for the correct one.
Morocco = photon_fixer.geocode("Morocco", exactly_one=False, language='en')[0]
Switzerland = photon_fixer.geocode("Switzerland", exactly_one=False, language='en')[1]
Georgia = photon_fixer.geocode("Georgia", exactly_one=False, language='en')[2]


def fix_geometry(gdf, points):
    """Simple function to correct Geometry column with new points."""
    for point in points:
        gdf.loc[gdf.location == f'{point}'.split(', ')[0], ['geometry']] = Point(point.latitude, point.longitude)

fix_geometry(fixed_day, [Morocco, Switzerland, Georgia])


# And now we can plot again, correctly this time.

cases_sorted = fixed_day.sort_values(by='num_sequences').drop('date', axis=1)

cases_sorted.explore(column='num_sequences',
                     tooltip='location',
                     popup='num_sequences',
                     marker_type='circle_marker',
                     marker_kwds={'radius':10})


# Same process as before, just not restricting on a single day.

fixed_sequence_type = omicron_cases.astype({'num_sequences_total': 'int64'})
total_country_cases = fixed_sequence_type.loc[:, ['location', 'num_sequences_total']].groupby('location').sum()

total_country_cases = total_country_cases.reset_index()


total_locations = geopandas.tools.geocode(total_country_cases.location)
total_country_cases = total_country_cases.assign(geometry=total_locations['geometry']) # Fill the Geometry column


fix_geometry(total_country_cases, [Morocco, Switzerland, Georgia]) # We can still fix these three points

total_country_cases.explore(column='num_sequences_total',
                     tooltip='location',
                     popup='num_sequences_total',
                     marker_type='circle_marker',
                     marker_kwds={'radius':10})


world = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres'))
world.head()


import pycountry

mapping = {country.name: country.alpha_3 for country in pycountry.countries}
total_country_cases['country_codes'] = total_country_cases['location'].replace(to_replace=mapping)


# This shows how GeoPandas can store two different geometries for each entry and switch between them.
combined_df = total_country_cases.merge(world, left_on='country_codes', right_on='iso_a3')
combined_df = combined_df.set_geometry('geometry_y')
combined_df.head()


combined_df.explore(column='num_sequences_total',
                     tooltip='location',
                     popup='num_sequences_total',
                     marker_type='circle_marker',
                     marker_kwds={'radius':10})

	pop_est	continent	name	iso_a3	gdp_md_est	geometry
0	920938	Oceania	Fiji	FJI	8374.0	MULTIPOLYGON (((180.00000 -16.06713, 180.00000...
1	53950935	Africa	Tanzania	TZA	150600.0	POLYGON ((33.90371 -0.95000, 34.07262 -1.05982...
2	603253	Africa	W. Sahara	ESH	906.5	POLYGON ((-8.66559 27.65643, -8.66512 27.58948...
3	35623680	North America	Canada	CAN	1674000.0	MULTIPOLYGON (((-122.84000 49.00000, -122.9742...
4	326625791	North America	United States of America	USA	18560000.0	MULTIPOLYGON (((-122.84000 49.00000, -120.0000...

	location	num_sequences_total	geometry_x	country_codes	pop_est	continent	name	iso_a3	gdp_md_est	geometry_y
0	Angola	1055	POINT (17.56912 -11.87758)	AGO	29310273	Africa	Angola	AGO	189000.0	MULTIPOLYGON (((12.99552 -4.78110, 12.63161 -4...
1	Argentina	8411	POINT (-64.96728 -34.99650)	ARG	44293293	South America	Argentina	ARG	879400.0	MULTIPOLYGON (((-68.63401 -52.63637, -68.25000...
2	Australia	47199	POINT (134.75500 -24.77611)	AUS	23232413	Oceania	Australia	AUS	1189000.0	MULTIPOLYGON (((147.68926 -40.80826, 148.28907...
3	Austria	12580	POINT (14.12456 47.59397)	AUT	8754413	Europe	Austria	AUT	416600.0	POLYGON ((16.97967 48.12350, 16.90375 47.71487...
4	Bangladesh	3700	POINT (90.29344 24.47693)	BGD	157826578	Asia	Bangladesh	BGD	628400.0	POLYGON ((92.67272 22.04124, 92.65226 21.32405...

Using GeoPandas to Plot and Manipulate Geographic Data¶

What is it and why?¶

Importing Data¶

Summing Up¶

Limitations¶

What Should I use it for then?¶

Thanks!¶

Works Referenced¶