# GIS, Spatial & Modern Dev Tools > Covering spatial data analysis (GeoPandas, Shapely, Folium), GIS tools (QGIS, Nominatim, Overpass), network analysis (NetworkX, Kumu), data transformation (Polars, DuckDB), and other handy modern tooling. --- ## 1. Shapely: Geometric Operations ```python from shapely.geometry import Point, LineString, Polygon from shapely.ops import unary_union pt = Point(77.59, 12.97) # (lon, lat) line = LineString([(0,0), (1,1), (2,0)]) poly = Polygon([(0,0), (1,0), (1,1), (0,1)]) # Predicates poly.contains(pt) # True if pt inside poly poly.intersects(other) poly.touches(other) # shares boundary only poly.disjoint(other) # no shared space # Measurements (in CRS units!) poly.area # area poly.length # perimeter / line length poly.bounds # (minx, miny, maxx, maxy) poly.centroid # returns Point # Operations a.union(b) # merge a.intersection(b) # overlapping area a.difference(b) # a minus b a.buffer(0.01) # expand by 0.01 deg a.simplify(0.001) # reduce vertices merged = unary_union([p1, p2, p3]) ``` --- ## 2. GeoPandas: Spatial DataFrames ```python import geopandas as gpd # Loading gdf = gpd.read_file("data.geojson") # GeoJSON gdf = gpd.read_file("data.gpkg") # GeoPackage (preferred) # From regular DataFrame gdf = gpd.GeoDataFrame( df, geometry=gpd.points_from_xy(df.lon, df.lat), crs="EPSG:4326" ) # CRS Management: CRITICAL ✅ gdf.crs # check current CRS gdf = gdf.set_crs("EPSG:4326") # assign if missing gdf = gdf.to_crs("EPSG:32644") # reproject to UTM 44N (India metres) # ✅ Always reproject to metre-based CRS before measuring distances! # Spatial Join (which points inside which polygon?) joined = gpd.sjoin(points_gdf, polys_gdf, how="left", predicate="within") # Dissolve (aggregate geometries by attribute) dissolved = gdf.dissolve(by="state", aggfunc="sum") # Clip, Buffer clipped = gpd.clip(gdf, mask_gdf) gdf_proj = gdf.to_crs("EPSG:32644") gdf_proj["buffer_1km"] = gdf_proj.geometry.buffer(1000) # Save gdf.to_file("out.geojson", driver="GeoJSON") gdf.to_file("out.gpkg", driver="GPKG") ``` | EPSG | CRS | When to Use | | :--- | :--- | :--- | | `4326` | WGS84 lat/lon | GPS, GeoJSON, raw data | | `3857` | Web Mercator | Tile maps, Leaflet | | `32644` | UTM 44N | India metric measurements | --- ## 3. Folium: Interactive Web Maps ```python import folium from folium.plugins import MarkerCluster, HeatMap m = folium.Map(location=[20.59, 78.96], zoom_start=5) folium.TileLayer('cartodbpositron').add_to(m) # clean light theme # Markers folium.Marker([12.97, 77.59], popup="Bengaluru").add_to(m) folium.CircleMarker([28.6, 77.2], radius=10, color="red", fill=True).add_to(m) # Cluster (many points) mc = MarkerCluster().add_to(m) for _, row in gdf.iterrows(): folium.Marker([row.lat, row.lon]).add_to(mc) # Choropleth (coloured polygons) folium.Choropleth( geo_data=states_geojson, data=df, columns=["state_code", "value"], key_on="feature.properties.ST_NM", fill_color="YlOrRd", legend_name="Population" ).add_to(m) # Heatmap HeatMap([[r.lat, r.lon, r.val] for _, r in df.iterrows()], radius=15).add_to(m) folium.LayerControl().add_to(m) m.save("map.html") ``` --- ## 4. Nominatim: OSM Geocoding ```python from geopy.geocoders import Nominatim from geopy.extra.rate_limiter import RateLimiter import time geolocator = Nominatim(user_agent="my_app_v1") # ✅ must set user_agent # Forward (address -> lat/lon) loc = geolocator.geocode("Connaught Place, New Delhi") print(loc.latitude, loc.longitude) # Reverse (lat/lon -> address) loc = geolocator.reverse("28.63, 77.22") print(loc.address) # Batch with rate limiter (1 req/sec limit) geocode = RateLimiter(geolocator.geocode, min_delay_seconds=1) df['location'] = df['address'].apply(geocode) df['lat'] = df['location'].apply(lambda x: x.latitude if x else None) df['lon'] = df['location'].apply(lambda x: x.longitude if x else None) ``` --- ## 5. Overpass API: OSM Data Extraction ```python import overpy api = overpy.API() result = api.query(""" [out:json][timeout:25]; node[amenity=hospital][bbox:12.8,77.4,13.1,77.8]; out body; """) for node in result.nodes: print(node.tags.get("name"), node.lat, node.lon) # Via raw requests import requests query = """ [out:json]; area["ISO3166-1"="IN"]->.a; (node["amenity"="school"](area.a);); out center; """ data = requests.get("https://overpass-api.de/api/interpreter", params={"data": query}).json() ``` --- ## 6. Kumu: Relationship & Network Visualization ``` Web-based tool for stakeholder maps, influence networks, org charts. URL: https://kumu.io Import JSON format: { "elements": [ {"label": "Alice", "type": "Person"}, {"label": "Project X", "type": "Project"} ], "connections": [ {"from": "Alice", "to": "Project X", "label": "leads"} ] } Excel import: Sheet 1 = elements, Sheet 2 = connections Decorations (Kumu CSS): @settings { element-color: categorize("type", purple, blue); } @elements { size: scale("weight", 10, 30); } Use Cases: Stakeholder maps, causal loop diagrams, system maps ``` --- ## 7. NetworkX: Graph & Network Analysis ```python import networkx as nx G = nx.DiGraph() # directed; nx.Graph() for undirected G.add_edges_from([("A","B"), ("B","C")], weight=1.0) # From DataFrame G = nx.from_pandas_edgelist(df, source="from", target="to", edge_attr="weight") # Centrality Measures nx.degree_centrality(G) # connections count nx.betweenness_centrality(G) # bridge importance nx.pagerank(G) # PageRank scores # Paths nx.shortest_path(G, "A", "D") nx.diameter(G) # longest shortest path nx.is_connected(G.to_undirected()) # Communities (Louvain) from networkx.algorithms.community import louvain_communities communities = louvain_communities(G) # Plot import matplotlib.pyplot as plt pos = nx.spring_layout(G, seed=42) nx.draw(G, pos, with_labels=True, node_color="skyblue", arrows=True) plt.show() ``` --- ## 8. OSMnx: Street Networks ```python import osmnx as ox import networkx as nx # Download city road network G = ox.graph_from_place("Bengaluru, India", network_type="drive") # Nearest node by coordinates orig = ox.nearest_nodes(G, X=77.59, Y=12.97) # (lon, lat) order dest = ox.nearest_nodes(G, X=77.65, Y=12.98) # Shortest path path = nx.shortest_path(G, orig, dest, weight="length") # Stats ox.basic_stats(G) # edges, nodes, avg street length ``` --- ## 9. Polars: Fast DataFrames (Rust-backed) ```python import polars as pl df = pl.read_csv("data.csv") df = pl.read_parquet("data.parquet") # Lazy API (optimized execution) result = ( pl.scan_csv("large.csv") .filter(pl.col("age") > 25) .group_by("city") .agg(pl.col("salary").mean().alias("avg_salary")) .sort("avg_salary", descending=True) .collect() ) # Common ops df.select(["col1", "col2"]) df.with_columns(pl.col("price") * 1.18) df.join(other, on="id", how="left") df.filter(pl.col("status") == "active") ``` --- ## 10. DuckDB: SQL on Files ```python import duckdb # Query CSV/Parquet directly (no loading!) result = duckdb.sql("SELECT city, COUNT(*) FROM 'data.csv' GROUP BY city").df() # Spatial extension duckdb.sql("INSTALL spatial; LOAD spatial;") duckdb.sql(""" SELECT ST_Distance( ST_Point(77.59, 12.97), ST_Point(72.88, 19.07) ) AS dist_degrees """) # Works with in-memory pandas DFs directly result = duckdb.sql("SELECT * FROM df WHERE age > 30").fetchdf() ``` --- ## 11. Kepler.gl: Large-Scale Map Visualization ```python from keplergl import KeplerGl m = KeplerGl(height=500) m.add_data(data=gdf, name="Spatial Data") # GeoDataFrame or dict m # renders in Jupyter m.save_to_html("kepler_map.html", config=m.config) ``` --- ## 12. Quick Reference | Task | Tool | | :--- | :--- | | Geometry ops (buffer, intersect) | Shapely | | Spatial DataFrames | GeoPandas | | Interactive web maps | Folium | | Large-scale map viz | Kepler.gl | | Address → coordinates | Nominatim (geopy) | | OSM feature extraction | Overpass API (overpy) | | Desktop GIS | QGIS | | Relationship/network viz | Kumu | | Graph analysis | NetworkX | | Street routing | OSMnx | | Fast DataFrames | Polars | | SQL on files | DuckDB | | Columnar storage | Parquet (PyArrow) |