# Python code used for this research
####################################################
## PART 1: Install and import necessary libraries ##
####################################################
!pip install geoplot
import pandas as pd
import seaborn as sns
import plotly.express as px
import numpy as np
import geopandas as gpd
import matplotlib.pyplot as plt
from google.colab import files
import zipfile
import io
# Note: Statistics Canada's definition of "recent immigrants" is those that migrated to Canada within the last 5 years of a census year (e.g., 2016-2021, 2011-2016, 2006-2011, etc.)
###############################
## PART 2: Import csv files ##
##############################
# Import csv files for time-series analysis
NPR_ProvTerr = pd.read_csv("Non-Permanent Migration.csv") # Total Number of Non-Permanent Residents by Province and Territory
IntMig = pd.read_csv("Estimates of the components of international migration, quarterly.csv")# Quarterly International Migration by Province and Territory
Top10Mig=pd.read_csv("Migration Timeline of Top 10 Countries from 2020.csv") # Census-by-census immigration data from top 10 countries of origin in 2020
# Import csv files for categorical data analysis, 2021 Census
GenderMig=pd.read_csv("Migration Gender Breakdown.csv", na_values=['#DIV/0!']) # % of Recent Migrants that identify as woman by country of origin and province and territory
Top5=pd.read_csv("Top 5 by Province & Territory.csv") # Each province's top 5 countries of origin, recent immigrants
Age=pd.read_csv("Migration Age Analysis.csv") # Age breakdown of immigrant population vs. general population, by province and territory
# Import csv files for scatter plot analysis, Top 25 Census Metropolitan Areas (2021 Census) by population
RecImm_CMA=pd.read_csv("Recent Immigrants CMA.csv") # Recent immigrants by census metropolitan areas
NPR_CMA=pd.read_csv("Non-Permanent Residents CMA.csv") # Toal non-permanent residents by census metropolitan areas
GrowthCMA=pd.read_csv("CMA vs. Immigrant Growth Rate.csv") # % Growth rate of CMAs and immigrant population in each CMA
# Note: There will be 26 entries for the scatter plot analysis (the Ottawa-Gatineau CMA is split between Ontario and Quebec)
##################################
## PART 3: Time-Series Analysis ##
##################################
# It's good practice to check the dataframe's head (first few rows) and tail (last few rows)
## Plot 1: Total Non-Permanent Residents by Province and Territory, Quarterly, Since 2001 ##
NPR_ProvTerr.head()
NPR_ProvTerr.tail()
fig = px.area(NPR_ProvTerr,
x='QUARTER',
y=['Alberta', 'British Columbia', 'Manitoba', 'New Brunswick',
'Newfoundland and Labrador', 'Northwest Territories', 'Nova Scotia',
'Nunavut', 'Ontario', 'Prince Edward Island', 'Quebec',
'Saskatchewan', 'Yukon'])
fig.update_layout(
title="Total Number of Non-Permanent Residents in Canada",
xaxis_title="Quarter",
yaxis_title="Non-Permanent Residents",
legend_title="Province or Territory",
)
fig.add_vrect(
x0="Q2 2020",
x1="Q3 2021",
label=dict(
text="COVID-19 First 3 Waves",
textposition="top center",
font=dict(size=20, family="Times New Roman"),
),
fillcolor="green",
opacity=0.25,
line_width=0,
)
fig.show()
## Plot 2: Quarterly International Migration by Province and Territory, Quarterly, Since 2001 ##
IntMig.head()
IntMig.tail()
fig2 = px.area(IntMig,
x='QUARTER',
y=['Alberta', 'British Columbia', 'Manitoba', 'New Brunswick',
'Newfoundland and Labrador', 'Northwest Territories', 'Nova Scotia',
'Nunavut', 'Ontario', 'Prince Edward Island', 'Quebec',
'Saskatchewan', 'Yukon'])
fig2.update_layout(
title="Quarterly Immigration",
xaxis_title="Quarter",
yaxis_title="Net International Immigration",
legend_title="Province or Territory"
)
fig2.add_vrect(
x0="Q2 2020",
x1="Q3 2021",
label=dict(
text="COVID-19 First 3 Waves",
textposition="top center",
font=dict(size=20, family="Times New Roman"),
),
fillcolor="green",
opacity=0.25,
line_width=0,
)
fig2.show()
## Plot 3 Census-by-Census Breakdown of Countr#es of Origins of Recent Immigrants Since 2001 ##
# The 10 countries investigated are the top 10 countries of origin for people who immigrated to Canada in 2020.
Top10Mig.head()
Top10Mig.tail()
fig3 = px.line(Top10Mig, x='YEAR', y='RECENT IMMIGRANTS', color='COUNTRY', width=800, height=600)
fig3.update_layout(
title="Recent Immigrants by Country",
xaxis_title="Census Year",
yaxis_title="Number of Recent Immigrants",
legend_title="Country of Origin",
)
fig3.update_xaxes(tickvals=[2001, 2006, 2011, 2016, 2021],
ticktext=["2001", "2006", "2011", "2016", "2021"])
fig3.show()
#############################################################################
## PART 4: Categorical Data Analysis, Recent Immigrants as per 2021 Census ##
#############################################################################
## Plot 1: Gender Analysis ##
# Comparing the gender breakdown of recent immigrants vs. the general population, by province and territory, and nationwide.
GenderMig.head()
GenderMig.tail()
# Plot by province and territory, dash line represents % women in general population
fig4_1=px.bar(GenderMig, x='COUNTRY OF ORIGIN', y='% Women-Canada')
fig4_1.add_hline(y=0.5072921, annotation_text="% Women in Canada", line_dash="dash")
fig4_1.update_layout(
title="Gendered Analysis of International Migration to Canada",
xaxis_title="Country of Origin",
yaxis_title="% Women (0-1)"
)
fig4_1.show()
fig4_2=px.bar(GenderMig, x='COUNTRY OF ORIGIN', y='% Women-Alberta')
fig4_2.add_hline(y=0.5007935, annotation_text="% Women in Alberta", line_dash="dash")
fig4_2.update_layout(
title="Gendered Analysis of International Migration to Alberta",
xaxis_title="Country of Origin",
yaxis_title="% Women (0-1)"
)
fig4_2.show()
fig4_3=px.bar(GenderMig, x='COUNTRY OF ORIGIN', y='% Women-British Columbia')
fig4_3.add_hline(y=0.5085835, annotation_text="% Women in British Columbia", line_dash="dash")
fig4_3.update_layout(
title="Gendered Analysis of International Migration to British Columbia",
xaxis_title="Country of Origin",
yaxis_title="% Women (0-1)"
)
fig4_3.show()
fig4_4=px.bar(GenderMig, x='COUNTRY OF ORIGIN', y='% Women-Manitoba')
fig4_4.add_hline(y=0.5034162, annotation_text="% Women in Manitoba", line_dash="dash")
fig4_4.update_layout(
title="Gendered Analysis of International Migration to Manitoba",
xaxis_title="Country of Origin",
yaxis_title="% Women (0-1)"
)
fig4_4.show()
fig4_5=px.bar(GenderMig, x='COUNTRY OF ORIGIN', y='% Women-New Brunswick')
fig4_5.add_hline(y=0.5081871, annotation_text="% Women in New Brunswick", line_dash="dash")
fig4_5.update_layout(
title="Gendered Analysis of International Migration to New Brunswick",
xaxis_title="Country of Origin",
yaxis_title="% Women (0-1)"
)
fig4_5.show()
fig4_6=px.bar(GenderMig, x='COUNTRY OF ORIGIN', y='% Women-Newfoundland and Labrador')
fig4_6.add_hline(y=0.5101851, annotation_text="% Women in Newfoundland and Labrador", line_dash="dash")
fig4_6.update_layout(
title="Gendered Analysis of International Migration to Newfoundland and Labrador",
xaxis_title="Country of Origin",
yaxis_title="% Women (0-1)"
)
fig4_6.show()
fig4_7=px.bar(GenderMig, x='COUNTRY OF ORIGIN', y='% Women-Northwest Territories')
fig4_7.add_hline(y=0.4925737, annotation_text="% Women in Northwest Territories", line_dash="dash")
fig4_7.update_layout(
title="Gendered Analysis of International Migration to Northwest Territories",
xaxis_title="Country of Origin",
yaxis_title="% Women (0-1)"
)
fig4_7.show()
fig4_8=px.bar(GenderMig, x='COUNTRY OF ORIGIN', y='% Women-Nova Scotia')
fig4_8.add_hline(y=0.5133694, annotation_text="% Women in Nova Scotia", line_dash="dash")
fig4_8.update_layout(
title="Gendered Analysis of International Migration to Nova Scotia",
xaxis_title="Country of Origin",
yaxis_title="% Women (0-1)"
)
fig4_8.show()
fig4_9=px.bar(GenderMig, x='COUNTRY OF ORIGIN', y='% Women-Nunavut')
fig4_9.add_hline(y=0.4908425, annotation_text="% Women in Nunavut", line_dash="dash")
fig4_9.update_layout(
title="Gendered Analysis of International Migration to Nunavut",
xaxis_title="Country of Origin",
yaxis_title="% Women (0-1)"
)
fig4_9.show()
fig4_10=px.bar(GenderMig, x='COUNTRY OF ORIGIN', y='% Women-Ontario')
fig4_10.add_hline(y=0.5099208, annotation_text="% Women in Ontario", line_dash="dash")
fig4_10.update_layout(
title="Gendered Analysis of International Migration to Ontario",
xaxis_title="Country of Origin",
yaxis_title="% Women (0-1)"
)
fig4_10.show()
fig4_11=px.bar(GenderMig, x='COUNTRY OF ORIGIN', y='% Women-Prince Edward Island')
fig4_11.add_hline(y=0.5115337, annotation_text="% Women in Prince Edward Island", line_dash="dash")
fig4_11.update_layout(
title="Gendered Analysis of International Migration to Prince Edward Island",
xaxis_title="Country of Origin",
yaxis_title="% Women (0-1)"
)
fig4_11.show()
fig4_12=px.bar(GenderMig, x='COUNTRY OF ORIGIN', y='% Women-Quebec')
fig4_12.add_hline(y=0.5057579, annotation_text="% Women in Quebec", line_dash="dash")
fig4_12.update_layout(
title="Gendered Analysis of International Migration to Quebec",
xaxis_title="Country of Origin",
yaxis_title="% Women (0-1)"
)
fig4_12.show()
fig4_13=px.bar(GenderMig, x='COUNTRY OF ORIGIN', y='% Women-Saskatchewan')
fig4_13.add_hline(y=0.5027660, annotation_text="% Women in Saskatchewan", line_dash="dash")
fig4_13.update_layout(
title="Gendered Analysis of International Migration to Saskatchewan",
xaxis_title="Country of Origin",
yaxis_title="% Women (0-1)"
)
fig4_13.show()
fig4_14=px.bar(GenderMig, x='COUNTRY OF ORIGIN', y='% Women-Yukon')
fig4_14.add_hline(y=0.5003729, annotation_text="% Women in Yukon", line_dash="dash")
fig4_14.update_layout(
title="Gendered Analysis of International Migration to Yukon",
xaxis_title="Country of Origin",
yaxis_title="% Women (0-1)"
)
fig4_14.show()
## Plot 2: Top 5 Countries of Origin of Recent Immigrants, by Provinces and Territories ##
Top5.head()
Top5.tail()
fig5=px.bar(Top5, x='Province/Territory', y='% of All Migrants (2016-21)',color='Country of Origin', barmode='stack', text='Country of Origin')
fig5.update_layout(
title="Breakdown of Country of Origin by Province/Territory"
)
fig5.show()
## Plot 3 Age-Based Analysis ##
# Comparing the age breakdown of the recent immigrant population vs. the general population
Age.head()
Age.tail()
fig6 = sns.catplot(Age, x='Age Group', y='Percentage', hue='Category', col='Geographic Area', kind='bar')
fig6.fig.suptitle("Age Distribution of General and Immigrant Population by Geographic Area")
fig6.set_axis_labels("Age Group", "Percentage")
fig6.set_titles("Age Distribution for {col_name}")
for axes in fig6.axes.flat:
axes.tick_params(axis='x', labelrotation=45)
plt.show()
#############################################################################
## PART 5: Scatter Plot Analysis, Top 25 Census Metropolitan Areas (2021) ##
#############################################################################
## Plot 1: Non-Permanent Residents Population per 100,000 vs. CMA population ##
NPR_CMA.head()
NPR_CMA.tail()
fig7=px.scatter(NPR_CMA, x="POP (2021)", y="PER CAPITA",
color="REGION", trendline="ols")
fig7.update_layout(
title="CMA Population vs. Non-Permanent Residents Per 100,000")
fig7.show()
## Plot 2: Comparing Growth Rate of Immigrant Population vs. General Population, 2016-21 ##
GrowthCMA.head()
GrowthCMA.tail()
fig8=px.scatter(GrowthCMA, x="% Change from 2016", y="% Change from 2016 (Foreign-Born)",
color="REGION", trendline="ols")
fig8.update_layout(
title="Growth Rates of CMAs vs. CMAs' Foreign-Born Population")
fig8.show()
## Plot 3: Recent Immigrant Population Per 100,000 (by Continent of Origin) vs. General Population ##
RecImm_CMA.head()
RecImm_CMA.tail()
fig9 = sns.relplot(RecImm_CMA, x='POPULATION (2021)', y='RECENT IMMIGRANTS PER-CAPITA', hue='REGION',
col='RECENT IMMIGRANTS-CONTINENT OF ORIGIN', kind='scatter')
fig9.set_axis_labels("CMA Population (millions)", "Recent Immigrants Per 100,000")
fig9.set_titles("Per-Capita Recent Immigrants {col_name}")
for axes in fig9.axes.flat:
axes.tick_params(axis='x', labelrotation=45)
plt.show()
#####################################################################################
## PART 6: Map Visualization, Non-Permanent Residents Growth Rate (Q3 2022-Q3 2024 ##
#####################################################################################
# The growth of NPRs accelerated starting at the end ot 2022
# Let's map out its growth starting from Q3 2022 (July 2022)
NPR22_24 = NPR_ProvTerr[(NPR_ProvTerr['QUARTER'] == 'Q3 2024') | (NPR_ProvTerr['QUARTER'] == 'Q3 2022')]
NPR22_24 = NPR22_24.drop(['QUARTER'], axis=1)
NPR22_24
# Growth rate = [# of NPRs in Q3 2024 - # of NPRs in Q3 2022]/[# of NPRs in Q3 2022] multiplied by 100
NPR_Growth2=((NPR22_24.iloc[1]-NPR22_24.iloc[0])/NPR22_24.iloc[0])*100
NPR_Growth1=['Canada', 'Newfoundland and Labrador', 'Prince Edward Island', 'Nova Scotia', 'New Brunswick', 'Quebec', 'Ontario',
'Manitoba', 'Saskatchewan', 'Alberta', 'British Columbia', 'Yukon', 'Northwest Territories', 'Nunavut']
# The name "PRENAME" was selected to match with the geographic column name of the shapefile
# that will be shown in a later step
NPR_Growth=pd.DataFrame({'PRENAME': NPR_Growth1, 'Growth Rate': NPR_Growth2})
NPR_Growth.drop(NPR_Growth.index[0], inplace=True)
NPR_Growth
uploaded = files.upload()
# Upload zip file associated with shapefile corresponding to 2021 Census boundaries
# Then extract all files to retrieve shapefile
zf = zipfile.ZipFile(io.BytesIO(uploaded['lpr_000b21a_e.zip']), "r")
zf.extractall()
Canada = gpd.read_file("lpr_000b21a_e.shp")
# To check the column name corresponding to the names of the provinces and territories
# It is indeed "PRENAME"
Canada.head()
# Perform a join on the shapefile and NPR dataset
# The column the join is being performed on (i.e., PRENAME") must have the EXACT SAME ORDER
# in both the shapefile and dataset
CanMap = Canada.merge(NPR_Growth, on="PRENAME")
CanMap.head()
CanMap.plot(column="Growth Rate", cmap="Blues", legend=True,
figsize=(12, 12))
plt.title("Growth Rate of Non-Permanent Residents (Q3 2022-Q3 2024)")
plt.show()