Source code for brix.functions

# Other functions that use brix objects (e.g. function to get street network for table from OSM)

from .classes import Handler
from .classes import Indicator
from .classes import CompositeIndicator
from .classes import GEOGRIDDATA
from .classes import StaticHeatmap
from .helpers import get_buffer_size, has_tags, urljoin

try:
	from osmnx import geometries_from_polygon
except:
	geometries_from_polygon = None
import requests
import pandas as pd
import geopandas as gpd
import numpy as np
import re
from geopandas.tools import sjoin
from shapely.geometry import Point

def normalize_table_name(table_name):
	'''
	Removes non-letter characters, replaces spaces with underscore, and transforms to lowercase.
	'''
	new_table_name = table_name.lower()
	new_table_name = re.sub('[^a-zA-Z _]+', '', new_table_name)
	new_table_name = re.sub('[ ]+', ' ', new_table_name)
	new_table_name = new_table_name.replace(' ','_')
	return new_table_name

def check_table_name(table_name):
	new_table_name = normalize_table_name(table_name)
	if new_table_name!=table_name:
		return False
	else:
		return True

[docs]def is_table(table_name): ''' Checks if table exists. Parameters ---------- table_name : str Name of table to check. Returns is_table : boolean True if table exists. ''' table_list = list_tables() return (table_name in table_list)
[docs]def list_tables(): ''' Returns a list of table names for all all tables. Returns ------- table_list: list List of table names (strings). ''' table_list_url = urljoin(Handler.remote_host,'api/tables/list') r = requests.get(table_list_url) if r.status_code!=200: raise NameError(f'Unable to retrieve list of tables: status code ={r.status_code}') else: return [t.strip('/').split('/')[-1] for t in r.json()]
[docs]def OSM_infer_geogrid_data(H,amenity_tag_categories=None): ''' Infers the cell type based on the OSM tags classified into categories in amenity_tag_categories. This function does not update the color of the cell, as :func:`brix.Handler.post_geogrid_data` will eventually take care of this. Parameters ---------- H: :class:`brix.Handler` Handler for the table to infer types for. amenity_tag_categories: dict Dictionary with categories of amenities. For example: { "restaurants": { "amenity":["restaurant","cafe","fast_food","pub","cafe"], "shop":["coffee"] }, "nightlife": { "amenity":["bar","pub","biergarten","nightclub"] } } Will add two new columns: "category_restaurants" and "category_nightlife" Returns ------- geogrid_data: list List of cells to be updated. ''' if amenity_tag_categories is None: raise NameError('amenity_tag_categories is required') node_data_df = get_OSM_nodes(H,amenity_tag_categories=amenity_tag_categories) node_data_df['category'] = None for cat in amenity_tag_categories: node_data_df.loc[node_data_df[f'category_{cat}'],'category'] = cat node_data_df = node_data_df[~node_data_df['category'].isna()] geogrid_df = H.get_geogrid_data(include_geometries=True,as_df=True) geogrid_df = gpd.GeoDataFrame(geogrid_df[['id']],geometry=geogrid_df['geometry']) matched = sjoin(node_data_df, geogrid_df, how='inner') matched = matched[['category','id_right','id_left']].groupby(['id_right','category']).count().reset_index().sort_values(by='id_left',ascending=False).groupby(['id_right','category']).first() id_category = dict(matched.reset_index()[['id_right','category']].values) geogrid_data = H.get_geogrid_data() for cell in geogrid_data: if cell['id'] in id_category.keys(): cell['name'] = id_category[cell['id']] return geogrid_data
def add_height(H, levels): ''' Adds levels to all the cells in geogrid. Function mainly used for testing as an example. Parameters --------- H: :class:`brix.Handler` Handler connected to the necessarry table. levels: float Number of levels by which to rise height Returns ------- new_geogrid_data: dict Same as input, but with additional levels in each cell. ''' geogrid_data = H.get_geogrid_data() for cell in geogrid_data: cell['height'] += levels return geogrid_data
[docs]def make_numeric_indicator(name,return_indicator,viz_type='bar',requires_geometry=False,requires_geogrid_props=False): ''' Function that constructs an indicator based on a user defined return_indicator function. Parameters ---------- name: str Name of the indicator. return_indicator: func Function that takes in geogrid_data and return the value of the indicator. viz_type: str, defaults to 'bar' Visualization type in front end. Used for numeric indicators. requires_geometry: boolean, defaults to `False` If `True`, the geogrid_data object will also come with geometries. requires_geogrid_props: boolean, defaults to `False` If `True`, the geogrid_data object will include properties. Returns ------- I: :class:`brix.Indicator` Numeric indicator that returns the value of the given function. ''' I = Indicator( name=name, requires_geometry=requires_geometry, requires_geogrid_props=requires_geogrid_props, viz_type=viz_type) I.set_return_indicator(return_indicator) return I
[docs]def make_static_heatmap_indicator(shapefile,columns=None,name=None): ''' Function that constructs a heatmap indicator that only visualizes the given shapefile. This function wraps :class:`brix.StaticHeatmap` to make it easier for users to find. Parameters ---------- shapefile: geopandas.GeoDataFrame or str Shapefile with values for each point, or path to shapefile. columns: list Columns to plot. If not provided, it will return all numeric columns. The name of the indicator will be given by the name of the column. name: str, optional Name of the indicator. If not provided, it will generate a name by hashing the column names. Returns ------- Heatmap: brix.Indicator Heatmap indicator that posts the given shapefile to the table. ''' HM = StaticHeatmap(shapefile,columns=columns,name=name) return HM
[docs]def get_OSM_geometries(H,tags = {'building':True},buffer_percent=0.25,use_stored=True,only_polygons=True): ''' Gets the buildings from OSM within the table's geogrid. This function requires osmnx package to be installed. Simple usage: `buildings = OSM_geometries(H)`. Parameters ---------- H: :class:`brix.Handler` Table Handler. tags: dict, defaults to building Tags of geometries to get. See: `osmnx.geometries_from_polygon` buffer_percent: float, defaults to 0.25 Buffer to use around the table. Size of buffer in units of the grid diameter See :func:`brix.get_buffer_size`. use_stored: boolean, defaults to True If True, the function will retrieve the results once and save them in the Handler under the :attr:`brix.Handler.OSM_data` attribute. If False, the function will retrieve the results every time it is called. only_polygons: boolean, defaults to True If False, it will return all buildings, including those without their polygon shape (e..g some buildings just have a point). Returns ------- buildings: geopandas.GeoDataFrame Table with geometries from OSM. ''' if 'OSM_geometries' not in H.OSM_data.keys(): H.OSM_data['OSM_geometries'] = None if (H.OSM_data['OSM_geometries'] is None)|(not use_stored): limit = H.grid_bounds(buffer_percent=buffer_percent) if geometries_from_polygon is not None: buildings = geometries_from_polygon(limit,tags) else: raise NameError('Package osmnx not found.') H.OSM_data['OSM_geometries'] = buildings.copy() else: print('Using stored OSM geometries') buildings = H.OSM_data['OSM_geometries'].copy() if only_polygons: buildings = buildings[buildings.geometry.type=='Polygon'] return buildings
[docs]def get_OSM_nodes(H,expand_tags=False,amenity_tag_categories=None,use_stored=True,buffer_percent=0.25,quietly=True): ''' Returns the nodes from OSM. This function can be used to obtain a list of amenities within the area defined by the table. There is a default buffer added around the grid, but you can increase this by changing `buffer_percent`. Parameters ---------- H: :class:`brix.Handler` Table Handler. expand_tags: boolean, defaults to False. If True, it will expand all the tags into a wide format with one column per tag. Columns will be named as: tag_{tag} amenity_tag_categories: dict (optional) Dictionary with categories of amenities. For example: { "restaurants": { "amenity":["restaurant","cafe","fast_food","pub","cafe"], "shop":["coffee"] }, "nightlife": { "amenity":["bar","pub","biergarten","nightclub"] } } Will add two new columns: "category_restaurants" and "category_nightlife" use_stored: boolean, defaults to True If True, the function will retrieve the results once and save them in the Handler under the :attr:`brix.Handler.OSM_data` attribute. If False, the function will retrieve the results every time it is called. buffer_percent: float, defaults to 0.25 Buffer to use around the table. Size of buffer in units of the grid diameter See `get_buffer_size`. quietly: boolean, defaults to False If True, it will print the generated URL Returns ------- node_data_df: geopandas.GeoDataFrame Table with all the nodes within the bounds. ''' if 'OSM_nodes' not in H.OSM_data.keys(): H.OSM_data['OSM_nodes'] = None if (H.OSM_data['OSM_nodes'] is None)|(not use_stored): bbox = H.grid_bounds(bbox=True,buffer_percent=buffer_percent) OSM_NODES_URL_ROOT='https://lz4.overpass-api.de/api/interpreter?data=[out:json][bbox];node;out;&bbox=' str_bounds=str(bbox[0])+','+str(bbox[1])+','+str(bbox[2])+','+str(bbox[3]) osm_node_url_bbox=OSM_NODES_URL_ROOT+str_bounds if not quietly: print(osm_node_url_bbox) r = requests.get(osm_node_url_bbox) node_data = r.json() node_data_df=pd.DataFrame(node_data['elements']) H.OSM_data['OSM_nodes'] = node_data_df.copy() else: print('Using stored OSM data') node_data_df = H.OSM_data['OSM_nodes'].copy() if amenity_tag_categories is not None: for cat_name in amenity_tag_categories: node_data_df[f'category_{cat_name}'] = node_data_df.apply(lambda row: has_tags(row['tags'], amenity_tag_categories[cat_name]), axis=1) if expand_tags: tag_df = [] for i,t in node_data_df[~node_data_df['tags'].isna()][['id','tags']].values: t = {f'tag_{k}':t[k] for k in t} t['id'] = i tag_df.append(t) tag_df = pd.DataFrame(tag_df) node_data_df = pd.merge(node_data_df,tag_df,how='left') node_data_df = gpd.GeoDataFrame(node_data_df,geometry=gpd.points_from_xy(node_data_df['lon'],node_data_df['lat'])) return node_data_df
[docs]def griddify(geogrid_data,shapefile,extend_grid=True,buffer_percent=1.3,columns=None,local_crs=None): ''' From a shapefile with polygons and properties, it creates a shapefile with points and the properties of the polygons they fall in. Points are taken from the given GEOGRID and the grid is extended to incorporate a buffer. Points are in the center of the grid. Parameters ---------- geogrid_data: brix.GEOGRIDDATA shapefile: geopandas.GeoDataFrame Shapefile in WGS84 (default) or in local_crs (if local_crs is provided) extend_grid: boolean, defaults to `True` If False, it will only return the values for the centroids of the grid. buffer_percent: float, defaults to 1.3 Buffer to extend the grid by (in units of grid diameter). columns: list, defaults to all numeric Columns to select besides geometry. If not provided, it will default to all numeric columns. local_crs: str, defaults to wgs84 ESRI code for local CRS, must match crs of shapefile. Recommended: Calculating the centroids of each cell will be more precise if this is provided. Returns ------- joined: geopandas.GeoDataFrame Shapefile of points and their values. ''' if columns is None: columns = shapefile.drop('geometry',1).select_dtypes(include=[np.number]).columns.tolist() geogrid_data_df = geogrid_data.as_df(include_geometries=True) if local_crs is not None: geogrid_data_df = geogrid_data_df.to_crs(local_crs) geogrid_data_df.geometry = geogrid_data_df.geometry.centroid if extend_grid: limit = geogrid_data.bounds(buffer_percent=buffer_percent) selected_shapefile = shapefile[shapefile.geometry.within(limit)] geogrid_data_df['lat'] = geogrid_data_df.geometry.y geogrid_data_df['lon'] = geogrid_data_df.geometry.x lon_dx = geogrid_data_df.sort_values(by='lat')['lon'].diff().abs().median() lat_dx = geogrid_data_df.sort_values(by='lon')['lat'].diff().abs().median() s_lon_min,s_lat_min,s_lon_max,s_lat_max = selected_shapefile.total_bounds lat_min = geogrid_data_df['lat'].min() lat_max = geogrid_data_df['lat'].max() lon_min = geogrid_data_df['lon'].min() lon_max = geogrid_data_df['lon'].max() all_lats = np.arange(lat_min,s_lat_min,-1*lat_dx).tolist()[::-1][:-1]+np.arange(lat_min,lat_max,lat_dx).tolist()+np.arange(lat_max,s_lat_max,lat_dx).tolist()[1:] all_lons = np.arange(lon_min,s_lon_min,-1*lon_dx).tolist()[::-1][:-1]+np.arange(lon_min,lon_max,lon_dx).tolist()+np.arange(lon_max,s_lon_max,lon_dx).tolist()[1:] extended_grid = [Point(lon,lat) for lat in all_lats for lon in all_lons] extended_grid = gpd.GeoDataFrame([],geometry=extended_grid,crs='EPSG:4326').reset_index().rename(columns={'index':'extended_id'}) grid_match = sjoin(geogrid_data.as_df(include_geometries=True),extended_grid) extended_grid = pd.merge(extended_grid,grid_match[['id','extended_id']].rename(columns={'id':'grid_id'}),how='left') selected_grid = extended_grid grid_ids = ['extended_id','grid_id'] else: limit = geogrid_data.bounds() selected_shapefile = shapefile[shapefile.geometry.within(limit)] selected_grid = geogrid_data_df.rename(columns={'id':'grid_id'}) grid_ids = ['grid_id'] joined = sjoin(selected_shapefile,selected_grid,how='left') joined = pd.merge(joined[grid_ids+columns],selected_grid[grid_ids+['geometry']],how='outer') for c in columns: joined[c] = joined[c].astype(float) joined = joined[~joined['geometry'].isna()] joined = joined[~joined[columns].isnull().all(axis=1)] # Drops rows with all null values joined = gpd.GeoDataFrame(joined.drop('geometry',1),geometry=joined['geometry']) return joined