Python convert WKT polygon to row wise points

2.2k Views Asked by At
"POLYGON ((12 13,22 23,16 17,22 24))",101,Something,100000
"POLYGON ((10 12,40 42,46 34,16 24,88 22,33 24,18 20 ))",102,another,200000

How can I get something like below in a csv file:

UID(like 101,102 etc) represents an unique identifier for each polygon.

UID#1,County,population,Point#1_Lat,Point#1_Long
UID#1,County,population,Point#2_Lat,Point#2_Long
UID#1,County,population,Point#3_Lat,Point#3_Long
UID#1,County,population,Point#n_Lat,Point#n_Long

UID#2,County,population,Point#1_Lat,Point#1_Long
UID#2,County,population,Point#2_Lat,Point#2_Long
UID#2,County,population,Point#3_Lat,Point#3_Long
UID#2,County,population,Point#n_Lat,Point#n_Long
2

There are 2 best solutions below

4
On

Here is a solution using pyparsing. Let me know if that doesn't work for you - it shouldn't be too difficult to come up with something that only uses the standard library (e.g. re, etc.), but it will definitely be uglier.

import csv
from pyparsing import Group, Literal, OneOrMore, Optional, Word
from pyparsing import delimitedList
from pyparsing import alphas, nums

data = """
"POLYGON ((12 13,22 23,16 17,22 24))",101,Something,100000
"POLYGON ((10 12,40 42,46 34,16 24,88 22,33 24,18 20 ))",102,another,200000
"""

def parse_line(line):
    latitude = Word(nums)
    longitude = Word(nums)
    point = Group(latitude + longitude)
    point_sequence = delimitedList(point, delim=',')

    name = Word("POLYGON").suppress()
    paren_left = Literal("((").suppress()
    paren_right = Literal("))").suppress()
    quote = Literal('"').suppress()
    polygon = Group(quote + name + paren_left + point_sequence + paren_right + quote)

    uid = Word(nums)
    county = Word(alphas)
    population = Word(nums)
    sep = Literal(",").suppress()
    parser = polygon + sep + uid + sep + county + sep + population

    result = parser.parseString(line)
    return result

def parse_lines(data, outfile):
    with open(outfile, 'w') as f:
        writer = csv.writer(f, lineterminator='\n')
        lines = data.split('\n')
        for line in lines:
            if not line:
                continue
            points, uid, county, population = parse_line(line)
            for lat, long in points:
                writer.writerow([uid, county, population, lat, long])
            writer.writerow('')

parse_lines(data, r'd:\out.txt') # change the path to wherever you want output

Result:

101,Something,100000,12,13
101,Something,100000,22,23
101,Something,100000,16,17
101,Something,100000,22,24

102,another,200000,10,12
102,another,200000,40,42
102,another,200000,46,34
102,another,200000,16,24
102,another,200000,88,22
102,another,200000,33,24
102,another,200000,18,20
0
On

Thank you senshin for the solution.That was my first attempt at Python.I tried your suggestion.Did try to work out alternative and got good result.

Geometry,Zone,Type,UID
"POLYGON(x1 y1,x2 y2,x3 y3,x4 y4)",name1,abc,100
"POLYGON(x1 y1,x2 y2,x3 y3,x4 y4,x5 y5,x6 y6)",name2,pqr,101

import csv
import re
import sys


l_InputFileName ='D:/Example1.txt'             # make changes here..
l_OutputFileName ='D:/Example1_o.txt'          # make changes here..

fo = open(l_OutputFileName, "a+")
with open( l_InputFileName, 'r') as csvfile:

csvR = csv.reader( csvfile, delimiter=',', quotechar='"')
#Get first row which has column names
header = csvR.next()
#Convert first row which is a list into a string
print ','.join(header[0:])
#Write the header to output csv file,index starts at 1 here(splitting point into lat and        long)
fo.write('longitude,latitude,')
fo.write(','.join(header[1:])+'\n')

for row in csvR:

    #Remove the string POLYGON and brackets from first column
    coodlist=row[0].strip('POLYGON').strip('()').split(',')
    #Get the number of columns
    #print "\nNo of Columns="+str(len(row))
    #Get remaining columns 
    strRemainingCols =','.join(row[1:])
    #Print each lat,long in a seperate row..reamining columns will not change
    #Seperate latitude and longitude using comma
    for i in range(len(coodlist)):
       print coodlist[i].replace(' ',',')+','+strRemainingCols
       fo.write(coodlist[i].replace(' ',',')+','+strRemainingCols+'\n')

csvfile.close()
fo.close()