Skip to content
Permalink
master
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Notebook 4 - Exploring Area with Shapely\n",
"\n",
"In the previous notebook, we moved towards creating a script to quantize the traffic timeseries by timesteps. We wanted to map `(lon, lat)` coordinates to `regionID` integers, but we found some bugs using MatPlotLib's `Path` features.\n",
"\n",
"Let's copy the work we have so far, and then start using Shapely.\n",
"\n",
"---"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"from math import floor, ceil\n",
"\n",
"# The minimum and maximum unix timestamps found in the code.\n",
"start_time = 1404360000\n",
"end_time = 1405828798\n",
"\n",
"# We'll use this function to index the matrix by timestep...\n",
"def time_to_index(tt, st = 1404360000, divisor=60):\n",
" return floor((tt-st)/divisor)\n",
"\n",
"assert time_to_index(start_time) == 0\n",
"assert time_to_index(start_time + 59) == 0\n",
"assert time_to_index(start_time + 60) == 1\n",
"assert time_to_index(start_time + 61) == 1\n",
"\n",
"# ... and NN is the number of cells we need.\n",
"NN = ceil((end_time - start_time)/60)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# This code is used to get our polylines/paths\n",
"def process_line(line):\n",
" # Not sure what these values are\n",
" val1 = line[0]\n",
" val2 = line[1]\n",
" polyline = [float(v) for v in line[2:]]\n",
" polyline = np.array(list(zip(polyline[0::2], polyline[1::2])))\n",
" return val1, val2, polyline \n",
"\n",
"with open(\"./DataForUConn/DecideRegion/area.txt\") as f:\n",
" area = [process_line(line.split(\",\")) for line in f.read().strip().split(\"\\n\")]"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# Now let's import shapely and play with it!\n",
"# https://stackoverflow.com/questions/36399381/\n",
"from shapely.geometry import Point\n",
"from shapely.geometry.polygon import Polygon\n",
"\n",
"polygons = [Polygon(list_of_points[2][1:]) for list_of_points in area]"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"image/svg+xml": [
"<svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" width=\"100.0\" height=\"100.0\" viewBox=\"114.308986281912 22.753792684012 0.0431524431759982 0.06060088137599706\" preserveAspectRatio=\"xMinYMin meet\"><g transform=\"matrix(1,0,0,-1,0,45.5681862494)\"><path fill-rule=\"evenodd\" fill=\"#66cc99\" stroke=\"#555555\" stroke-width=\"0.0012120176275199413\" opacity=\"0.6\" d=\"M 114.326017028,22.8102859632 L 114.32707704,22.8108076408 L 114.328720957,22.8105261008 L 114.329188081,22.8111802664 L 114.329682155,22.8114121219 L 114.33006843,22.8110229357 L 114.332934056,22.8111223025 L 114.333535927,22.8113707191 L 114.33371559,22.8116108548 L 114.333697624,22.8121490883 L 114.334038984,22.811884112 L 114.334209664,22.8115114883 L 114.334667804,22.8114949273 L 114.335009164,22.8113375969 L 114.336114092,22.8103356469 L 114.337120205,22.8102197183 L 114.338234116,22.8097394417 L 114.338467678,22.8097477223 L 114.33955464,22.8104515754 L 114.341360253,22.8104267335 L 114.342528063,22.8090024622 L 114.342941288,22.8081826714 L 114.343471294,22.8080418987 L 114.344971481,22.8083979705 L 114.34616624,22.8082323558 L 114.346489634,22.8080253372 L 114.346732179,22.8073049101 L 114.346399802,22.8057729546 L 114.346184206,22.8055245278 L 114.344836733,22.8050442347 L 114.344243845,22.8043320727 L 114.344558256,22.8030485156 L 114.344270795,22.8018726011 L 114.344755885,22.8013922951 L 114.344755885,22.8011687038 L 114.344297744,22.8010444862 L 114.343588075,22.8012101096 L 114.343336547,22.8011687038 L 114.343237732,22.8009037062 L 114.343974351,22.7992226158 L 114.344944531,22.798758863 L 114.345205043,22.7967299262 L 114.344728936,22.7960260023 L 114.344719952,22.7956781798 L 114.344791818,22.7948831534 L 114.345052329,22.7943448517 L 114.345043346,22.7937651398 L 114.344764868,22.7934090299 L 114.344180963,22.7931274539 L 114.344010283,22.7926802438 L 114.344154014,22.7921999053 L 114.344648087,22.7916450295 L 114.344064182,22.7913054776 L 114.340758382,22.7900383615 L 114.340192443,22.7898561609 L 114.33969837,22.789905852 L 114.339177347,22.7893840944 L 114.337524447,22.7885807494 L 114.337686144,22.7882991635 L 114.338135301,22.7882908815 L 114.338270049,22.7881003966 L 114.338225133,22.7874958121 L 114.337596312,22.7874212741 L 114.33670698,22.7865682253 L 114.336266806,22.7864274303 L 114.336015277,22.7861872503 L 114.335907479,22.7856654785 L 114.335925446,22.7854501435 L 114.336832744,22.785417015 L 114.337057323,22.7851437047 L 114.337587329,22.7831973879 L 114.337470548,22.7821786663 L 114.337784958,22.7816817262 L 114.337434615,22.7810853957 L 114.337551396,22.7804062384 L 114.338207167,22.7809031831 L 114.339626505,22.7797353601 L 114.340973978,22.7804476505 L 114.341171607,22.7792218461 L 114.343040103,22.7802737194 L 114.344270795,22.7803151316 L 114.344836733,22.7800252462 L 114.345896745,22.7798181847 L 114.347352016,22.7799755514 L 114.348421011,22.7789816534 L 114.348807287,22.7776067491 L 114.349768484,22.7765548552 L 114.349777467,22.7753787122 L 114.34949899,22.7742853871 L 114.3498134,22.7729932643 L 114.349894248,22.7706823219 L 114.349651703,22.7696469408 L 114.347594561,22.7692742017 L 114.3473071,22.7680482969 L 114.346857943,22.767915766 L 114.346220139,22.7688766121 L 114.345950644,22.7696635069 L 114.344989447,22.7697877531 L 114.344351643,22.7704255481 L 114.343462311,22.7701770569 L 114.342528063,22.7688931784 L 114.341962124,22.7674850396 L 114.341602798,22.7670377455 L 114.341548899,22.765563321 L 114.341189573,22.762995465 L 114.340578719,22.7622333824 L 114.339590572,22.7618109217 L 114.339087516,22.7609162947 L 114.338207167,22.7599388252 L 114.338395813,22.759764868 L 114.338314964,22.758828809 L 114.338647341,22.7578099055 L 114.338584459,22.756757859 L 114.337093256,22.7567992783 L 114.33673393,22.7563519492 L 114.336239856,22.7560371611 L 114.335727816,22.7560868645 L 114.335009164,22.7566915881 L 114.334317461,22.7561697035 L 114.319333562,22.7585388779 L 114.320276794,22.7609991308 L 114.321363755,22.7621836812 L 114.323097504,22.7628794962 L 114.326124826,22.7677749517 L 114.32607991,22.771916486 L 114.326367371,22.7733328619 L 114.320357642,22.7749894232 L 114.316189459,22.7770932271 L 114.314222148,22.7786917651 L 114.311230759,22.7822035133 L 114.315596571,22.7855909395 L 114.327750777,22.7782113774 L 114.330616402,22.7914131405 L 114.328020271,22.7937320133 L 114.325963129,22.7946926777 L 114.326412287,22.7973675953 L 114.325963129,22.8009119874 L 114.324813286,22.8051022012 L 114.325127696,22.8076609837 L 114.326017028,22.8102859632 z\" /></g></svg>"
],
"text/plain": [
"<shapely.geometry.polygon.Polygon at 0x7f54f4064c50>"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"polygons[0]"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"image/svg+xml": [
"<svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" width=\"100.0\" height=\"100.0\" viewBox=\"114.24391855752 22.75208199902 0.08129142395999622 0.062398880060001716\" preserveAspectRatio=\"xMinYMin meet\"><g transform=\"matrix(1,0,0,-1,0,45.5665628781)\"><path fill-rule=\"evenodd\" fill=\"#66cc99\" stroke=\"#555555\" stroke-width=\"0.0016258284791999245\" opacity=\"0.6\" d=\"M 114.315596571,22.7855909395 L 114.311230759,22.7822035133 L 114.305921715,22.7860381728 L 114.295582106,22.7921750602 L 114.294477179,22.7919845807 L 114.283517732,22.7887629518 L 114.281748051,22.783677758 L 114.281298893,22.7779794655 L 114.281658219,22.7765051592 L 114.278379369,22.775030837 L 114.275540692,22.7731340732 L 114.269656727,22.7682802257 L 114.266449742,22.7651491539 L 114.265425662,22.7636829925 L 114.264823791,22.7619020408 L 114.264644128,22.7599802435 L 114.264662094,22.7567992783 L 114.2643926,22.7550927925 L 114.246929351,22.7832222347 L 114.247135963,22.7833713152 L 114.247746817,22.7846964684 L 114.248160043,22.7848952402 L 114.249211071,22.7848538295 L 114.249785993,22.7842326662 L 114.250208201,22.784141562 L 114.250720241,22.7842409484 L 114.250971769,22.7845473893 L 114.251402961,22.7847627257 L 114.252049748,22.7847461614 L 114.253855361,22.7839593535 L 114.254241637,22.7843154882 L 114.2544213,22.7842823594 L 114.255121986,22.7836943225 L 114.255643009,22.7846136467 L 114.256622172,22.7848869581 L 114.257080313,22.7854749899 L 114.258158292,22.7861458399 L 114.258391854,22.7865102509 L 114.258499651,22.7873384541 L 114.258706264,22.7876780159 L 114.259344068,22.7881335244 L 114.260413063,22.7882660357 L 114.26071849,22.7884648023 L 114.261733586,22.7896242688 L 114.261967148,22.7902868166 L 114.26237139,22.7906015257 L 114.263045127,22.7907920071 L 114.261859351,22.793028074 L 114.262155795,22.7940135881 L 114.263125975,22.7944607938 L 114.263664964,22.7938976456 L 114.263961408,22.7938231111 L 114.264940572,22.7941957832 L 114.265443629,22.7942371911 L 114.26570414,22.7948003379 L 114.266728219,22.7955456757 L 114.268003827,22.7956202093 L 114.268911126,22.7963075723 L 114.269270452,22.7967878963 L 114.269647744,22.7969203991 L 114.269701643,22.7976574439 L 114.269342317,22.7984027661 L 114.269369266,22.7991398029 L 114.269989104,22.7993385537 L 114.270887419,22.7993551163 L 114.271749802,22.8000341793 L 114.272746932,22.8003240222 L 114.275055602,22.8033300711 L 114.275567642,22.8036364691 L 114.276412058,22.8038103703 L 114.276618671,22.8052429769 L 114.276313244,22.805955134 L 114.276421041,22.8066010408 L 114.277759531,22.8091349532 L 114.278091908,22.8093916541 L 114.278379369,22.8098884932 L 114.278433268,22.8101534733 L 114.278055975,22.8106585903 L 114.277930211,22.8111305831 L 114.278002076,22.8114038413 L 114.278460217,22.8114700856 L 114.278909375,22.8113458775 L 114.280158033,22.8101700346 L 114.280346679,22.8089610588 L 114.281703135,22.8092757247 L 114.282107377,22.8090769884 L 114.282538568,22.8094827414 L 114.282987726,22.8096731965 L 114.283445867,22.8096235126 L 114.283921974,22.8088285676 L 114.284290283,22.8089693395 L 114.285179615,22.8102611214 L 114.285467076,22.80959039 L 114.288314736,22.8080915832 L 114.288009309,22.8074870874 L 114.288215921,22.8067252533 L 114.289204068,22.8057315502 L 114.289024405,22.8055576514 L 114.287596083,22.805325786 L 114.288144056,22.8051850104 L 114.288916607,22.8044976921 L 114.291907997,22.8039345854 L 114.29257275,22.8044231634 L 114.293444116,22.8039180234 L 114.293866324,22.8040008333 L 114.294513111,22.8035122538 L 114.294441246,22.8031478882 L 114.295788719,22.8034460056 L 114.29721704,22.8044811302 L 114.297594333,22.8038931804 L 114.298977738,22.8033880384 L 114.299346047,22.8034542866 L 114.299705374,22.8043651966 L 114.300316228,22.8043983205 L 114.301564886,22.805226415 L 114.301996077,22.8052512578 L 114.302526083,22.8049779872 L 114.303262702,22.8052346959 L 114.303936438,22.8050607965 L 114.304457461,22.8053506288 L 114.305724086,22.8048206493 L 114.306469688,22.8038766184 L 114.306281041,22.8027172732 L 114.306478671,22.8022866567 L 114.306613418,22.8022204079 L 114.306855963,22.8025516516 L 114.307323087,22.8027338353 L 114.307709363,22.8023446244 L 114.307610548,22.8019636935 L 114.307736312,22.8018146333 L 114.308239369,22.8024771218 L 114.309272431,22.802593057 L 114.310422275,22.8041167672 L 114.310592955,22.8056238986 L 114.311643984,22.8062698069 L 114.312039242,22.8074539643 L 114.312030259,22.8081495484 L 114.31217399,22.8085056199 L 114.314132317,22.8081909521 L 114.314491643,22.8074953682 L 114.316530819,22.8071475749 L 114.317015909,22.8068991506 L 114.317357269,22.8064933899 L 114.317698629,22.8064933899 L 114.319531192,22.8074208411 L 114.319908484,22.8073711564 L 114.320339676,22.8066755683 L 114.32012408,22.8062284027 L 114.320231878,22.8029657051 L 114.321444603,22.8017401031 L 114.322109357,22.7999016794 L 114.321956643,22.7994627728 L 114.322199188,22.7951895704 L 114.316935061,22.7934421564 L 114.31132059,22.7890445368 L 114.312775861,22.787297044 L 114.315596571,22.7855909395 z\" /></g></svg>"
],
"text/plain": [
"<shapely.geometry.polygon.Polygon at 0x7f54cb6e0590>"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"polygons[1]"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"image/svg+xml": [
"<svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" width=\"100.0\" height=\"100.0\" viewBox=\"114.310024357604 22.776915145004 0.021888276792012107 0.034998274692000564\" preserveAspectRatio=\"xMinYMin meet\"><g transform=\"matrix(1,0,0,-1,0,45.5888285647)\"><path fill-rule=\"evenodd\" fill=\"#66cc99\" stroke=\"#555555\" stroke-width=\"0.0006999654938400113\" opacity=\"0.6\" d=\"M 114.326017028,22.8102859632 L 114.325127696,22.8076609837 L 114.324813286,22.8051022012 L 114.325963129,22.8009119874 L 114.326412287,22.7973675953 L 114.325963129,22.7946926777 L 114.328020271,22.7937320133 L 114.330616402,22.7914131405 L 114.327750777,22.7782113774 L 114.312775861,22.787297044 L 114.31132059,22.7890445368 L 114.316935061,22.7934421564 L 114.322199188,22.7951895704 L 114.321956643,22.7994627728 L 114.322109357,22.7999016794 L 114.321444603,22.8017401031 L 114.320231878,22.8029657051 L 114.32012408,22.8062284027 L 114.321157142,22.8070813285 L 114.32177698,22.8073214717 L 114.32328615,22.808381409 L 114.323250217,22.8088120062 L 114.323519712,22.809060427 L 114.323645476,22.8102776826 L 114.323888021,22.8106171873 L 114.325199561,22.8099878608 L 114.326017028,22.8102859632 z\" /></g></svg>"
],
"text/plain": [
"<shapely.geometry.polygon.Polygon at 0x7f54cb6ea790>"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"polygons[2]"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"# Now, let's check if any of these points are not within exactly one region...\n",
"\n",
"def point_to_rids(point, polygons=polygons, radius=0.0):\n",
" # in: tuple (lon, lat) and list of Polygons\n",
" RIDs = []\n",
" for rid, polygon in enumerate(polygons):\n",
" if polygon.contains(Point(point)):\n",
" RIDs.append(rid)\n",
" \n",
" return RIDs"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": true,
"jupyter": {
"outputs_hidden": true,
"source_hidden": true
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Processing ./data/od_part-m-000.pkl\n",
" Processing point 0 of 753817...\n",
" Processing point 10000 of 753817...\n",
" Processing point 20000 of 753817...\n"
]
},
{
"ename": "KeyboardInterrupt",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-8-98766354cd59>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 23\u001b[0m \u001b[0mdrop_point\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"dlon\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mii\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"dlat\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mii\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 24\u001b[0m \u001b[0;31m# List of region IDs for pickup, dropoff\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 25\u001b[0;31m \u001b[0mlist_of_pick_RIDs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpoint_to_rids\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpick_point\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 26\u001b[0m \u001b[0mlist_of_drop_RIDs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpoint_to_rids\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdrop_point\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 27\u001b[0m \u001b[0;31m# Number of regions these points belong to should = 1...\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m<ipython-input-7-28c6872853c9>\u001b[0m in \u001b[0;36mpoint_to_rids\u001b[0;34m(point, polygons, radius)\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mRIDs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mrid\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpolygon\u001b[0m \u001b[0;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpolygons\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 7\u001b[0;31m \u001b[0;32mif\u001b[0m \u001b[0mpolygon\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcontains\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mPoint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpoint\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 8\u001b[0m \u001b[0mRIDs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrid\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/miniconda3/envs/ds_shapely/lib/python3.7/site-packages/shapely/geometry/point.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, *args)\u001b[0m\n\u001b[1;32m 46\u001b[0m \u001b[0mBaseGeometry\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 47\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 48\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_set_coords\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 49\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 50\u001b[0m \u001b[0;31m# Coordinate getters and setters\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/miniconda3/envs/ds_shapely/lib/python3.7/site-packages/shapely/geometry/point.py\u001b[0m in \u001b[0;36m_set_coords\u001b[0;34m(self, *args)\u001b[0m\n\u001b[1;32m 131\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mempty\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 132\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 133\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_geom\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_ndim\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgeos_point_from_py\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 134\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m3\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 135\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Point() takes at most 3 arguments ({} given)\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/miniconda3/envs/ds_shapely/lib/python3.7/site-packages/shapely/geometry/point.py\u001b[0m in \u001b[0;36mgeos_point_from_py\u001b[0;34m(ob, update_geom, update_ndim)\u001b[0m\n\u001b[1;32m 228\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 229\u001b[0m \u001b[0;31m# Because of a bug in the GEOS C API, always set X before Y\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 230\u001b[0;31m \u001b[0mlgeos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mGEOSCoordSeq_setX\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 231\u001b[0m \u001b[0mlgeos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mGEOSCoordSeq_setY\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdy\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 232\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mn\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m3\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mKeyboardInterrupt\u001b[0m: "
]
}
],
"source": [
"# We want to see how many \"bad\" points there are!\n",
"# These are points that correspond to 0 or multiple RIDs.\n",
"\n",
"# For each 12 file, keep a log of each \"bad\" point.\n",
"od_list_of_pick_logs = []\n",
"od_list_of_drop_logs = []\n",
"\n",
"# And let's note how many total IDs there are.\n",
"od_length_of_each_file = [] \n",
"\n",
"for fn in [\"./data/od_part-m-{x}.pkl\".format(x=str(x).rjust(3,\"0\")) for x in range(12)]:\n",
" print(f\"Processing {fn}\")\n",
" df = pd.read_pickle(fn)\n",
" od_length_of_each_file.append(len(df))\n",
" od_log_of_bad_pick_points = []\n",
" od_log_of_bad_drop_points = []\n",
" # Check each pickup and dropoff point:\n",
" for ii in range(len(df)):\n",
" if ii % 10000 == 0:\n",
" print(f\" Processing point {ii} of {len(df)}...\")\n",
" \n",
" # Log points that correspond to 0 or multiple RIDs.\n",
" pick_point = (df[\"plon\"][ii], df[\"plat\"][ii])\n",
" drop_point = (df[\"dlon\"][ii], df[\"dlat\"][ii])\n",
" # List of region IDs for pickup, dropoff\n",
" list_of_pick_RIDs = point_to_rids(pick_point)\n",
" list_of_drop_RIDs = point_to_rids(drop_point)\n",
" # Number of regions these points belong to should = 1...\n",
" if not len(list_of_pick_RIDs) == 1:\n",
" od_log_of_bad_pick_points.append([ii, pick_point, list_of_pick_RIDs])\n",
" if not len(list_of_drop_RIDs) == 1:\n",
" od_log_of_bad_drop_points.append([ii, drop_point, list_of_drop_RIDs])\n",
" \n",
" od_list_of_pick_logs.append(od_log_of_bad_pick_points)\n",
" od_list_of_drop_logs.append(od_log_of_bad_drop_points)\n",
"\n",
"# Seems like we get the exact same issues as before!\n",
"# This is very slow -- over 1.5 minutes per 10,000...\n",
"# How fast is MatLab? Let's go back to that!"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"from matplotlib import path\n",
"\n",
"def process_line(line):\n",
" # Not sure what these values are\n",
" val1 = line[0]\n",
" val2 = line[1]\n",
" polyline = [float(v) for v in line[2:]]\n",
" polyline = np.array(list(zip(polyline[0::2], polyline[1::2])))\n",
" return val1, val2, polyline \n",
"\n",
"with open(\"./DataForUConn/DecideRegion/area.txt\") as f:\n",
" area = [process_line(line.split(\",\")) for line in f.read().strip().split(\"\\n\")]\n",
"\n",
"polypaths = [path.Path(line[2][1:]) for line in area]\n",
"\n",
"def point_to_rids(point, polypaths=polypaths, radius=0.0):\n",
" RIDs = []\n",
" for rid, polypath in enumerate(polypaths):\n",
" if polypath.contains_point(point, radius=radius):\n",
" RIDs.append(rid)\n",
" \n",
" return RIDs"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Processing ./data/od_part-m-000.pkl\n",
" Processing point 0 of 753817...\n",
" Processing point 10000 of 753817...\n",
" Processing point 20000 of 753817...\n",
" Processing point 30000 of 753817...\n",
" Processing point 40000 of 753817...\n",
" Processing point 50000 of 753817...\n",
" Processing point 60000 of 753817...\n",
" Processing point 70000 of 753817...\n",
" Processing point 80000 of 753817...\n",
" Processing point 90000 of 753817...\n",
" Processing point 100000 of 753817...\n"
]
},
{
"ename": "KeyboardInterrupt",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-10-f1c3d4460386>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 21\u001b[0m \u001b[0;31m# List of region IDs for pickup, dropoff\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 22\u001b[0m \u001b[0mlist_of_pick_RIDs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpoint_to_rids\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpick_point\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 23\u001b[0;31m \u001b[0mlist_of_drop_RIDs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpoint_to_rids\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdrop_point\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 24\u001b[0m \u001b[0;31m# Number of regions these points belong to should = 1...\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 25\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlist_of_pick_RIDs\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m<ipython-input-9-d59d185f8a58>\u001b[0m in \u001b[0;36mpoint_to_rids\u001b[0;34m(point, polypaths, radius)\u001b[0m\n\u001b[1;32m 17\u001b[0m \u001b[0mRIDs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 18\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mrid\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpolypath\u001b[0m \u001b[0;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpolypaths\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 19\u001b[0;31m \u001b[0;32mif\u001b[0m \u001b[0mpolypath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcontains_point\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpoint\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mradius\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mradius\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 20\u001b[0m \u001b[0mRIDs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrid\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 21\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/miniconda3/envs/ds_shapely/lib/python3.7/site-packages/matplotlib/path.py\u001b[0m in \u001b[0;36mcontains_point\u001b[0;34m(self, point, transform, radius)\u001b[0m\n\u001b[1;32m 467\u001b[0m \u001b[0mself\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtransform\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtransform_path\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 468\u001b[0m \u001b[0mtransform\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 469\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_path\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpoint_in_path\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpoint\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpoint\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mradius\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtransform\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 470\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 471\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mcontains_points\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpoints\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtransform\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mradius\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0.0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mKeyboardInterrupt\u001b[0m: "
]
}
],
"source": [
"od_list_of_pick_logs = []\n",
"od_list_of_drop_logs = []\n",
"\n",
"# And let's note how many total IDs there are.\n",
"od_length_of_each_file = [] \n",
"\n",
"for fn in [\"./data/od_part-m-{x}.pkl\".format(x=str(x).rjust(3,\"0\")) for x in range(12)]:\n",
" print(f\"Processing {fn}\")\n",
" df = pd.read_pickle(fn)\n",
" od_length_of_each_file.append(len(df))\n",
" od_log_of_bad_pick_points = []\n",
" od_log_of_bad_drop_points = []\n",
" # Check each pickup and dropoff point:\n",
" for ii in range(len(df)):\n",
" if ii % 10000 == 0:\n",
" print(f\" Processing point {ii} of {len(df)}...\")\n",
" \n",
" # Log points that correspond to 0 or multiple RIDs.\n",
" pick_point = (df[\"plon\"][ii], df[\"plat\"][ii])\n",
" drop_point = (df[\"dlon\"][ii], df[\"dlat\"][ii])\n",
" # List of region IDs for pickup, dropoff\n",
" list_of_pick_RIDs = point_to_rids(pick_point)\n",
" list_of_drop_RIDs = point_to_rids(drop_point)\n",
" # Number of regions these points belong to should = 1...\n",
" if not len(list_of_pick_RIDs) == 1:\n",
" od_log_of_bad_pick_points.append([ii, pick_point, list_of_pick_RIDs])\n",
" if not len(list_of_drop_RIDs) == 1:\n",
" od_log_of_bad_drop_points.append([ii, drop_point, list_of_drop_RIDs])\n",
" \n",
" od_list_of_pick_logs.append(od_log_of_bad_pick_points)\n",
" od_list_of_drop_logs.append(od_log_of_bad_drop_points)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# The above is MUCH faster, less than 30s per 10,000 points. Still quite slow, but not unbearably so.\n",
"# Seems we should use matplotlib and Paths for the processing here.\n",
"# At 30s per 10k, at 750k per 12 files... Oh man. This should take about 7.5 hours to process ...\n",
"\n",
"# Let's split this into twelve separate jobs!"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}