## Issues
wget -O ava_list.html 'https://www.ecfr.gov/cgi-bin/retrieveECFR?gp=&SID=371db32ecca6629af6dccad2a39d7833&mc=true&n=sp27.1.9.c'
pup 'text{}' < ava_list.html > ava_list.txt
Then we need to get a quick
for cfr in `psql -At -c 'select cfr_index from avas order by cfr_index' -d avas`; do \
desc=`perl -n -e "BEGIN { \\$cfr='$cfr';}" -e '$p++ if (/^§$cfr/);' \
-e 'if ($p>1) { exit if /Back to Top/; print} ' < ava_list.txt |\
tr "\n" '|' | sed -e 's/"/\"/g' -e 's/^/"/' -e 's/$/"/'`;
echo $cfr,$desc;
done > ava_list.csv
Now that we have the compelete description in there, we can quickly get the boundary values, and the like:
--\COPY ava_desc from desc.csv with csv;
create view ava_desc_text as
with r as (
select cfr_index,l,n
from ava_desc,unnest(string_to_array(d,'|')) with ordinality as d(l,n)
),m as (
select
cfr_index,l,
n as map_row from r
where l~*'^\s*Approved\s+maps?.?\s*$'
),b as (
select
cfr_index,l,
n as boundary_row
from r where l~*'^\s*Boundar'
),
v as (
select
cfr_index,l as version,
n as version_row
from r where l~*'^\s*\[.*\]\s*$'
),
a as (
select
cfr_index,name,
m.map_row,
boundary_row,
version_row,
version
from avas left join m using (cfr_index)
left join b using (cfr_index)
left join v using (cfr_index)
),
f as (select
cfr_index,name,version,version_row,
min(map_row) as map_row,
min(boundary_row) as boundary_row
from a
group by 1,2,3,4
)
select * from f;
create or replace view ava_map_bound as
with r as (
select cfr_index,l,n
from ava_desc,unnest(string_to_array(d,'|')) with ordinality as d(l,n)
),
m as (
select cfr_index,trim(both ' ' from string_agg(' ',l order by n)) as maps
from ava_desc_text join r using (cfr_index)
where n>map_row and n<boundary_row and length(l)>4
group by cfr_index
),
b as (
select cfr_index,trim(both ' ' from string_agg(' ',l order by n)) as boundary_desc
from ava_desc_text join r using (cfr_index)
where n>boundary_row and n<version_row and length(l)>4
group by cfr_index
)
select cfr_index,version,maps,boundary_desc
from avas a
left join ava_desc_text using (cfr_index)
left join m using (cfr_index)
left join b using (cfr_index)
update avas a
set boundary_description=m.boundary_desc,
cfr_revision_history=version,
approved_maps=m.maps
from ava_map_bound m
where a.cfr_index=m.cfr_index;
UPDATE 239 |
---|
## Schema
The current schema for the AVAs as postgis is below:
create table avas (
ava_id text primary key,
name text,
aka text,
creation_date date,
removal_date date,
county text,
state text,
within text,
contains text,
petitioner text,
cfr_author text,
cfr_index text,
cfr_revision_history text,
approved_maps text,
boundary_description text,
boundary geometry('MultiPolygon',4269),
used_maps text,
valid_start date,
valid_end date,
lcsh text,
sameAs text
);
CREATE TABLE |
---|
## AVA Centroids
I created a CSV file from the latest AVA list, where multiples are seperated by ‘|’.
quinn=# \d ava_list Table "public.ava_list" Column | Type | Modifiers ----------+------+----------- cfr | text | not null state | text | name | text | county | text | within | text | contains | text |
cfr | state | name | county | within | contains -------+-------+---------------------------------+------------------------+-------------------------+---------- 9.232 | CA | Big Valley District-Lake County | Lake | Clear Lake|North Coast | 9.196 | CA | Borden Ranch | Sacramento|San Joaquin | Lodi | 9.37 | CA | California Shenandoah Valley | Amador|El Dorado | Sierra Foothills | 9.209 | CA | Calistoga | Napa | Napa Valley|North Coast | 9.176 | CA | Capay Valley | Yolo | |
I uploaded the national counties (countyp020), and will just use them to locate the AVAs
create temp table ava_centroid as with c as ( select state,st_asewkt(st_centroid(st_union(boundary))) as centroid, regexp_replace(county,E' [^ ]+$','') as county from county group by state,county ), s as ( select state,st_centroid(st_union(centroid)) as centroid from c group by state ), a as ( select name, regexp_split_to_table(coalesce(county,''),'\|') as county, regexp_split_to_table(state,'\|') as state from ava_list ), t as ( select name,st_centroid(st_union(c.centroid)) as county, st_centroid(st_union(s.centroid)) as state from a join s using (state) left join c using (state,county) group by 1 order by 2 ) select name,coalesce(county,state) as centroid from t;
Then I created the avas from the centroids and the list, and the original shape for Coombsville.
create table avas as
select name,cfr,a.state,a.county,within,contains,
(coalesce(st_setsrid(geom,4269),centroid)) as boundary
from ava_list a left join napa_shape using (name) left join ava_centroid using (name);
## Template Polygons
The problem with using centroids, is that for users ready to make some modifications, they need to convert the point to a polygon, and that’s a pain in qgis. So instead, lets create triangles as place holders for the polygons.
This query creates
insert into avas (ava_id,name,cfr_index,within,contains,county,state,boundary)
select regexp_replace(lower(name),'[^a-z]','_','g') as ava_id,
name,cfr,within,contains,county,state,
case when (st_geometrytype(boundary) = 'ST_Point')
then
st_multi(
st_transform(st_setsrid(
st_makepolygon(
st_makeline(ARRAY[
st_makepoint(st_x(st_transform(boundary,3857)),
st_y(st_transform(boundary,3857))+4000),
st_makepoint(st_x(st_transform(boundary,3857))-4000*sin(pi()/3),
st_y(st_transform(boundary,3857))-1000*cos(pi()/3)),
st_makepoint(st_x(st_transform(boundary,3857))+4000*sin(pi()/3),
st_y(st_transform(boundary,3857))-1000*cos(pi()/3)),
st_makepoint(st_x(st_transform(boundary,3857)),
st_y(st_transform(boundary,3857))+4000)
])),3857),4269))
else
st_multi(boundary)
end as boundary
from ava_geojson
INSERT 0 239 |
---|
## GeoJSON files
Now we want to save these files as a proper GeoJSON file. This is a bit tricky, but using the example at: PostgresOnline, it’s pretty tractable
create or replace function geojson (avas)
returns json as $$
with
p as (
select row_to_json((SELECT l FROM (
SELECT
$1.ava_id,$1.name,$1.aka,$1.created,$1.removed,$1.county,$1.state,
$1.within,$1.contains,$1.petitioner,$1.cfr_author,$1.cfr_index,
$1.cfr_revision_history,$1.approved_maps,$1.boundary_description,
$1.used_maps,
$1.valid_start,$1.valid_end,$1.lcsh,$1.sameAs) As l)
) as properties,
$1.boundary
),
f as (
select 'Feature' As type,
ST_AsGeoJSON(boundary)::json As geometry,
properties
from p
),
fc as (
select 'FeatureCollection' as type,
array_to_json(array_agg(f)) as features
from f
)
SELECT row_to_json(fc) as geojson
from fc;
$$ language sql immutable;
CREATE FUNCTION |
---|
create materialized view avas_as_geojson as
with
p as (
select row_to_json((SELECT l FROM (
SELECT
ava_id,name,aka,created,removed,county,state,
within,contains,petitioner,cfr_author,cfr_index,
cfr_revision_history,approved_maps,boundary_description,
used_maps,valid_start,valid_end,lcsh,sameAs) As l)
) as properties,
boundary
from avas
),
f as (
select 'Feature' As type,
ST_AsGeoJSON(boundary)::json As geometry,
properties
from p
),
fc as (
select 'FeatureCollection' as type,
array_to_json(array_agg(f)) as features
from f
)
SELECT row_to_json(fc) as geojson
from fc;
SELECT 1 |
---|
## Individual files.
Now that we have a geojson maker, we can save all the files individually.
for i in `psql -At -c 'select ava_id from avas' -d avas`; do
echo $i;
psql -At -c "select geojson(a) from avas a where ava_id='$i'" -d avas > tbd/$i.geojson;
done
## Github Issues
The source for the definitive list of AVA’s is TTBs. This includes a PDF file that can be mined for a database of all the information.
create or replace function issue(avas)
returns json as $$
with i as (
select
$1.ava_id as title,ARRAY['AVA'] as labels,
format(E'# AVA: %s (%s)\n\nname | value\n--- | ---\nava_id | %2$s\ncfr_index | %s\nstate | %s\ncounty | %s\nwithin | %s\ncontains | %s\n\n\n## Approved Maps\n%s\n\n## Boundary\n%s',
$1.name,$1.ava_id,$1.cfr_index,$1.state,$1.county,
regexp_replace($1.within,'\|','\|','g'),
regexp_replace($1.contains,'\|','\|','g'),
regexp_replace($1.approved_maps,'\(([^)]+)\)',E'\n\\1.','g'),
regexp_replace($1.boundary_description,'\(',E'\n(','g')
) as body
)
select row_to_json(i) from i;
$$ language sql immutable;
CREATE FUNCTION |
---|
This is an example of setting an issue in your github repo. The examples below use authentication in your .netrc file.
for i in `psql -At -c 'select ava_id from avas' -d avas`; do
echo $i;
psql -At -c "select issue(a) from avas a where ava_id='$i'" -d avas > issue/$i.json;
done
http POST https://api.github.com/repos/UCDavisLibrary/ava/issues \
title=test body="#This is a test\nHope it works"