-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathBDS_firm_size.py
95 lines (79 loc) · 2.53 KB
/
BDS_firm_size.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
"""
Business Dynamics Statistics (BDS) from the US Census
Links
* http://www.census.gov/ces/dataproducts/bds/data.html
* http://www.census.gov/ces/dataproducts/bds/data_firm.html
* http://www.census.gov/ces/pdf/BDS_2013_Codebook.pdf
* http://fivethirtyeight.com/features/the-next-amazon-or-apple-or-ge-is-probably-failing-right-now/
* https://www.newyorkfed.org/medialibrary/media/research/staff_reports/sr707.pdf
Prepared for Data Bootcamp course at NYU
* http://databootcamp.nyuecon.com/
* https://github.com/NYUDataBootcamp/Materials/Code/Lab
Written by Dave Backus, February 2016
Created with Python 3.5
"""
import sys
import pandas as pd
#import matplotlib.pyplot as plt
print('\nPython version: ', sys.version)
print('Pandas version: ', pd.__version__, '\n')
#%%
"""
firm sizes
"""
url = 'http://www2.census.gov/ces/bds/firm/bds_f_sz_release.csv'
raw = pd.read_csv(url)
print('\nDataframe dimensions:', raw.shape)
print('\nVariables and dtypes:\n', raw.dtypes, sep='')
#print('Firm size categories:\n', fsz['fsize'].head(12), sep='')
# clean up size labels
# http://pandas.pydata.org/pandas-docs/stable/text.html#splitting-and-replacing-strings
#raw['fsize'] = raw['fsize'].str.split(n=1).str[1]
#print('\nEdited firm size categories:\n', raw['fsize'].head(12), sep='')
"""
# exam data
years = [2008, 2009, 2010]
d13 = raw[raw['year2'].isin(years)][['fsize', 'Firms', 'Emp']]
d13.to_dict('list')
"""
#%%
"""
year2 = date
fsize = size category
Firms = number of firms in category
firmdeath_firms = number of exits
"""
n1 = 2011
n2 = 2013
years = [n1, n2]
fsz = raw[raw['year2'].isin(years)]
fsz = fsz[['year2', 'fsize', 'Emp']]
fszp = fsz.pivot('fsize', 'year2', 'Emp')
fszp['PctChEmp'] = 100*(fszp[n2]/fszp[n1]-1)
fszp['PctChEmp'].plot(kind='barh')
#%%
# =============================================================================
"""
firm ages
"""
url = 'http://www2.census.gov/ces/bds/firm/bds_f_age_release.csv'
raw = pd.read_csv(url)
print('\nDataframe dimensions:', raw.shape)
print('\nVariables and dtypes:\n', raw.dtypes, sep='')
#print('Firm size categories:\n', fsz['fsize'].head(12), sep='')
#%%
# clean up size labels
# http://pandas.pydata.org/pandas-docs/stable/text.html#splitting-and-replacing-strings
raw['fsize'] = raw['fsize'].str.split(n=1).str[1]
#print('\nEdited firm size categories:\n', raw['fsize'].head(12), sep='')
#%%
"""
year2 = date
fsize = size category
Firms = number of firms in category
firmdeath_firms = number of exits
"""
fsz = raw[raw['year2'] >= 2012]
fsz = fsz.set_index(['year2', 'fsize'])
fsz = fsz['Firms']/10**6
fsz