Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- -- PostgreSQL
- -- combine data for all years into one table
- -- to find distinct addresses & BBL numbers
- create table dhcr_all (
- zip integer,
- bldgno1 text,
- street_name1 text,
- street_suffix1 text,
- bldgno2 text,
- street_name2 text,
- street_suffix2 text,
- bldgno3 text,
- street_name3 text,
- street_suffix3 text,
- status1 text,
- status2 text,
- status3 text,
- block text,
- lot integer,
- boro_code integer
- );
- insert into dhcr_all (
- zip,
- bldgno1,
- street_name1,
- street_suffix1,
- bldgno2,
- street_name2,
- street_suffix2,
- bldgno3,
- street_name3,
- street_suffix3,
- status1,
- status2,
- status3,
- block,
- lot,
- boro_code
- )
- SELECT * FROM dhcr2009tmp;
- insert into dhcr_all (
- zip,
- bldgno1,
- street_name1,
- street_suffix1,
- bldgno2,
- street_name2,
- street_suffix2,
- bldgno3,
- street_name3,
- street_suffix3,
- status1,
- status2,
- status3,
- block,
- lot,
- boro_code
- )
- SELECT * FROM dhcr2011;
- insert into dhcr_all (
- zip,
- bldgno1,
- street_name1,
- street_suffix1,
- bldgno2,
- street_name2,
- street_suffix2,
- bldgno3,
- street_name3,
- street_suffix3,
- status1,
- status2,
- status3,
- block,
- lot,
- boro_code
- )
- SELECT * FROM dhcr2012;
- -- 2013 doesn't have a bldg03, street3, stsufx3 columns
- insert into dhcr_all (
- zip,
- bldgno1,
- street_name1,
- street_suffix1,
- bldgno2,
- street_name2,
- street_suffix2,
- status1,
- status2,
- status3,
- block,
- lot,
- boro_code
- )
- SELECT
- "ZIP",
- "BLDGNO1",
- "STREET1",
- "STSUFX1",
- "BLDGNO2",
- "STREET2",
- "STSUFX2",
- "STATUS1",
- "STATUS2",
- "STATUS3",
- "BLOCK",
- "LOT",
- "BORO_CODE"
- FROM dhcr2013;
- -- should return about 170,000 rows
- select count(*) from dhcr_all;
- -- split bldgno1 column into two separate numbers for geocoding with NYC Geoclient API
- alter table dhcr_all add column bldgno1a text;
- alter table dhcr_all add column bldgno1b text;
- update dhcr_all set bldgno1a = split_part(bldgno1, 'TO', 1);
- update dhcr_all set bldgno1b = split_part(bldgno1, 'TO', 2;
- -- find all distinct addresses
- select bldgno1, bldgno1a, bldgno1b, street_name1, street_suffix1, boro_code, zip
- from dhcr_all
- group by bldgno1, bldgno1a, bldgno1b, street_name1, street_suffix1, boro_code, zip;
- -- returns a measley 4,755 rows, DHCR's data is obviously not complete!
- select count(*) from (
- select bldgno1, bldgno1a, bldgno1b, street_name1, street_suffix1, boro_code, zip
- from dhcr_all
- group by bldgno1, bldgno1a, bldgno1b, street_name1, street_suffix1, boro_code, zip
- ) as distinct_addresses;
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement