Skip to content

Latest commit

 

History

History
408 lines (349 loc) · 10.3 KB

NOTES.org

File metadata and controls

408 lines (349 loc) · 10.3 KB

## Issues

wget -O ava_list.html 'https://www.ecfr.gov/cgi-bin/retrieveECFR?gp=&SID=371db32ecca6629af6dccad2a39d7833&mc=true&n=sp27.1.9.c'
pup 'text{}' < ava_list.html > ava_list.txt

Then we need to get a quick

for cfr in `psql -At -c 'select cfr_index from avas order by cfr_index' -d avas`; do \
 desc=`perl -n -e "BEGIN { \\$cfr='$cfr';}" -e '$p++ if (/^§$cfr/);' \
       -e 'if ($p>1) { exit if /Back to Top/; print} ' < ava_list.txt |\
       tr "\n" '|' | sed -e 's/"/\"/g' -e 's/^/"/' -e 's/$/"/'`;
 echo $cfr,$desc;
done  > ava_list.csv

Now that we have the compelete description in there, we can quickly get the boundary values, and the like:

--\COPY ava_desc from desc.csv with csv;
create view ava_desc_text  as
with r as (
 select cfr_index,l,n
 from ava_desc,unnest(string_to_array(d,'|')) with ordinality as d(l,n)
),m as (
 select
 cfr_index,l,
 n as map_row from r
 where l~*'^\s*Approved\s+maps?.?\s*$'
),b as (
 select
 cfr_index,l,
 n as boundary_row
 from r where l~*'^\s*Boundar'
),
v as (
 select
 cfr_index,l as version,
 n as version_row
 from r where l~*'^\s*\[.*\]\s*$'
),
a as (
 select
 cfr_index,name,
 m.map_row,
 boundary_row,
 version_row,
 version
 from avas left join m using (cfr_index)
 left join b using (cfr_index)
 left join v using (cfr_index)
),
f as (select
 cfr_index,name,version,version_row,
 min(map_row) as map_row,
 min(boundary_row) as boundary_row
 from a
 group by 1,2,3,4
)
select * from f;
create or replace view ava_map_bound as
with r as (
 select cfr_index,l,n
 from ava_desc,unnest(string_to_array(d,'|')) with ordinality as d(l,n)
),
m as (
select cfr_index,trim(both ' ' from string_agg(' ',l order by n)) as maps
from ava_desc_text join r using (cfr_index)
where n>map_row and n<boundary_row and length(l)>4
group by cfr_index
),
b as (
select cfr_index,trim(both ' ' from string_agg(' ',l order by n)) as boundary_desc
from ava_desc_text join r using (cfr_index)
where n>boundary_row and n<version_row and length(l)>4
group by cfr_index
)
select cfr_index,version,maps,boundary_desc
from avas a
left join ava_desc_text using (cfr_index)
left join m using (cfr_index)
left join b using (cfr_index)
update avas a
set boundary_description=m.boundary_desc,
cfr_revision_history=version,
approved_maps=m.maps
from ava_map_bound m
where a.cfr_index=m.cfr_index;
UPDATE 239

## Schema

The current schema for the AVAs as postgis is below:

create table avas (
ava_id text primary key,
name text,
aka text,
creation_date date,
removal_date date,
county text,
state text,
within text,
contains text,
petitioner text,
cfr_author text,
cfr_index text,
cfr_revision_history text,
approved_maps text,
boundary_description text,
boundary geometry('MultiPolygon',4269),
used_maps text,
valid_start date,
valid_end date,
lcsh text,
sameAs text
);
CREATE TABLE

## AVA Centroids

I created a CSV file from the latest AVA list, where multiples are seperated by ‘|’.

quinn=# \d ava_list
   Table "public.ava_list"
  Column  | Type | Modifiers
----------+------+-----------
 cfr      | text | not null
 state    | text |
 name     | text |
 county   | text |
 within   | text |
 contains | text |

cfr | state | name | county | within | contains -------+-------+---------------------------------+------------------------+-------------------------+---------- 9.232 | CA | Big Valley District-Lake County | Lake | Clear Lake|North Coast | 9.196 | CA | Borden Ranch | Sacramento|San Joaquin | Lodi | 9.37 | CA | California Shenandoah Valley | Amador|El Dorado | Sierra Foothills | 9.209 | CA | Calistoga | Napa | Napa Valley|North Coast | 9.176 | CA | Capay Valley | Yolo | |

I uploaded the national counties (countyp020), and will just use them to locate the AVAs

create temp table ava_centroid as
with c as (
 select state,st_asewkt(st_centroid(st_union(boundary))) as centroid,
 regexp_replace(county,E' [^ ]+$','') as county
 from county group by state,county
),
s as (
 select state,st_centroid(st_union(centroid)) as centroid
from c group by state
),
a as (
 select name,
regexp_split_to_table(coalesce(county,''),'\|') as county,
regexp_split_to_table(state,'\|') as state from ava_list
),
t as (
 select name,st_centroid(st_union(c.centroid)) as county,
 st_centroid(st_union(s.centroid)) as state
 from a join s using (state)
left join c using (state,county)
group by 1 order by 2
)
select name,coalesce(county,state) as centroid
from t;

Then I created the avas from the centroids and the list, and the original shape for Coombsville.

create table avas as
select name,cfr,a.state,a.county,within,contains,
(coalesce(st_setsrid(geom,4269),centroid)) as boundary
from ava_list a left join napa_shape using (name) left join ava_centroid using (name);

## Template Polygons

The problem with using centroids, is that for users ready to make some modifications, they need to convert the point to a polygon, and that’s a pain in qgis. So instead, lets create triangles as place holders for the polygons.

This query creates

insert into avas (ava_id,name,cfr_index,within,contains,county,state,boundary)
select regexp_replace(lower(name),'[^a-z]','_','g') as ava_id,
name,cfr,within,contains,county,state,
case when (st_geometrytype(boundary) = 'ST_Point')
then
st_multi(
st_transform(st_setsrid(
st_makepolygon(
st_makeline(ARRAY[
 st_makepoint(st_x(st_transform(boundary,3857)),
              st_y(st_transform(boundary,3857))+4000),
 st_makepoint(st_x(st_transform(boundary,3857))-4000*sin(pi()/3),
              st_y(st_transform(boundary,3857))-1000*cos(pi()/3)),
 st_makepoint(st_x(st_transform(boundary,3857))+4000*sin(pi()/3),
              st_y(st_transform(boundary,3857))-1000*cos(pi()/3)),
 st_makepoint(st_x(st_transform(boundary,3857)),
              st_y(st_transform(boundary,3857))+4000)
])),3857),4269))
else
 st_multi(boundary)
end as boundary
from ava_geojson
INSERT 0 239

## GeoJSON files

Now we want to save these files as a proper GeoJSON file. This is a bit tricky, but using the example at: PostgresOnline, it’s pretty tractable

create or replace function geojson (avas)
returns json as $$
with
p as (
 select row_to_json((SELECT l FROM (
  SELECT
   $1.ava_id,$1.name,$1.aka,$1.created,$1.removed,$1.county,$1.state,
   $1.within,$1.contains,$1.petitioner,$1.cfr_author,$1.cfr_index,
   $1.cfr_revision_history,$1.approved_maps,$1.boundary_description,
   $1.used_maps,
   $1.valid_start,$1.valid_end,$1.lcsh,$1.sameAs) As l)
 ) as properties,
 $1.boundary
),
f as (
 select 'Feature' As type,
 ST_AsGeoJSON(boundary)::json As geometry,
 properties
 from p
),
fc as (
 select 'FeatureCollection' as type,
 array_to_json(array_agg(f)) as features
 from f
)
SELECT row_to_json(fc) as geojson
from fc;
$$ language sql immutable;
CREATE FUNCTION
create materialized view avas_as_geojson as
with
p as (
 select row_to_json((SELECT l FROM (
  SELECT
   ava_id,name,aka,created,removed,county,state,
   within,contains,petitioner,cfr_author,cfr_index,
   cfr_revision_history,approved_maps,boundary_description,
   used_maps,valid_start,valid_end,lcsh,sameAs) As l)
 ) as properties,
 boundary
 from avas
),
f as (
 select 'Feature' As type,
 ST_AsGeoJSON(boundary)::json As geometry,
 properties
 from p
),
fc as (
 select 'FeatureCollection' as type,
 array_to_json(array_agg(f)) as features
 from f
)
SELECT row_to_json(fc) as geojson
from fc;
SELECT 1

## Individual files.

Now that we have a geojson maker, we can save all the files individually.

for i in `psql -At -c 'select ava_id from avas' -d avas`; do
 echo $i;
 psql -At -c "select geojson(a) from avas a where ava_id='$i'" -d avas > tbd/$i.geojson;
done

## Github Issues

The source for the definitive list of AVA’s is TTBs. This includes a PDF file that can be mined for a database of all the information.

create or replace function issue(avas)
returns json as $$
with i as (
select
 $1.ava_id as title,ARRAY['AVA'] as labels,
 format(E'# AVA: %s (%s)\n\nname | value\n--- | ---\nava_id | %2$s\ncfr_index | %s\nstate | %s\ncounty | %s\nwithin | %s\ncontains | %s\n\n\n## Approved Maps\n%s\n\n## Boundary\n%s',
  $1.name,$1.ava_id,$1.cfr_index,$1.state,$1.county,
  regexp_replace($1.within,'\|','\|','g'),
  regexp_replace($1.contains,'\|','\|','g'),
  regexp_replace($1.approved_maps,'\(([^)]+)\)',E'\n\\1.','g'),
  regexp_replace($1.boundary_description,'\(',E'\n(','g')
 ) as body
)
select row_to_json(i) from i;
$$ language sql immutable;
CREATE FUNCTION

This is an example of setting an issue in your github repo. The examples below use authentication in your .netrc file.

for i in `psql -At -c 'select ava_id from avas' -d avas`; do
 echo $i;
 psql -At -c "select issue(a) from avas a where ava_id='$i'" -d avas > issue/$i.json;
done
http POST https://api.github.com/repos/UCDavisLibrary/ava/issues \
title=test body="#This is a test\nHope it works"