More Pandas

Here’s the Pandas code we ended up with–see the lecture capture for details!

import pandas as pd
import numpy as np

munis = pd.read_excel("municipalities.xlsx", 0)

areas = pd.read_excel("municipalities.xlsx", 2).rename(columns={"Land area (km^2)": "Area"})

town = munis[~munis['City']]

combined = pd.merge(munis, areas, on='Name')

combined['Change'] = ((combined['Population (2010)'] - combined['Population (2000)'])
                     / combined['Population (2000)'])

combined['Density (2010)'] = combined['Population (2010)'] / combined['Area']

combined = combined.sort_values(by='Density (2010)', ascending=True)

lreg = np.polyfit(combined['Density (2010)'], combined['Change'], 1)
f = np.poly1d(lreg)
combined['Predicted change'] = f(combined['Density (2010)'])

ax = combined.plot.scatter('Density (2010)', 'Change')
combined.plot.line('Density (2010)', 'Predicted change', ax=ax, color='Red')