Source code for pygrass.vector.table

"""
Created on Wed Aug  8 15:29:21 2012

@author: pietro
"""

import ctypes
import os
from collections import OrderedDict

import numpy as np
from sqlite3 import OperationalError

import grass.lib.vector as libvect
from grass.pygrass.gis import Mapset
from grass.exceptions import DBError
from grass.pygrass.utils import table_exist, decode
from grass.script.db import db_table_in_vector
from grass.script.core import warning

from grass.pygrass.vector import sql
from grass.lib.ctypes_preamble import ReturnString


# For test purposes
test_vector_name = "table_doctest_map"

DRIVERS = ("sqlite", "pg")
UNSUPPORTED_DRIVERS = ("ogr", "dbf")


[docs]def get_path(path, vect_name=None): """Return the full path to the database; replacing environment variable with real values :param path: The path with substitutional parameter :param vect_name: The name of the vector map >>> from grass.script.core import gisenv >>> import os >>> path = "$GISDBASE/$LOCATION_NAME/$MAPSET/sqlite/sqlite.db" >>> new_path = get_path(path) >>> new_path2 = os.path.join( ... gisenv()["GISDBASE"], ... gisenv()["LOCATION_NAME"], ... gisenv()["MAPSET"], ... "sqlite", ... "sqlite.db", ... ) >>> new_path.replace("//", "/") == new_path2.replace("//", "/") True >>> path = "$GISDBASE/$LOCATION_NAME/$MAPSET/vector/$MAP/sqlite.db" >>> new_path = get_path(path, "test") >>> new_path2 = os.path.join( ... gisenv()["GISDBASE"], ... gisenv()["LOCATION_NAME"], ... gisenv()["MAPSET"], ... "vector", ... "test", ... "sqlite.db", ... ) >>> new_path.replace("//", "/") == new_path2.replace("//", "/") True """ if "$" not in path: return path mapset = Mapset() path = path.replace("$GISDBASE", mapset.gisdbase) path = path.replace("$LOCATION_NAME", mapset.location) path = path.replace("$MAPSET", mapset.name) if vect_name is not None: path = path.replace("$MAP", vect_name) return path
[docs]class Filters: """Help user to build a simple sql query. >>> filter = Filters("table") >>> filter.get_sql() 'SELECT * FROM table;' >>> filter.where("area<10000").get_sql() 'SELECT * FROM table WHERE area<10000;' >>> filter.select("cat", "area").get_sql() 'SELECT cat, area FROM table WHERE area<10000;' >>> filter.order_by("area").limit(10).get_sql() 'SELECT cat, area FROM table WHERE area<10000 ORDER BY area LIMIT 10;' """ def __init__(self, tname): self.tname = tname self._select = None self._where = None self._orderby = None self._limit = None self._groupby = None def __repr__(self): return "Filters(%r)" % self.get_sql()
[docs] def select(self, *args): """Create the select query""" cols = ", ".join(args) if args else "*" select = sql.SELECT[:-1] self._select = select.format(cols=cols, tname=self.tname) return self
[docs] def where(self, condition): """Create the where condition :param condition: the condition of where statement, for example `cat = 1` :type condition: str """ self._where = "WHERE {condition}".format(condition=condition) return self
[docs] def order_by(self, *orderby): """Create the order by condition :param orderby: the name of column/s to order the result :type orderby: str """ self._orderby = "ORDER BY {orderby}".format(orderby=", ".join(orderby)) return self
[docs] def limit(self, number): """Create the limit condition :param number: the number to limit the result :type number: int """ if not isinstance(number, int): raise ValueError("Must be an integer.") self._limit = "LIMIT {number}".format(number=number) return self
[docs] def group_by(self, *groupby): """Create the group by condition :param groupby: the name of column/s to group the result :type groupby: str, list """ self._groupby = "GROUP BY {groupby}".format(groupby=", ".join(groupby)) return self
[docs] def get_sql(self): """Return the SQL query""" sql_list = [] if self._select is None: self.select() sql_list.append(self._select) if self._where is not None: sql_list.append(self._where) if self._groupby is not None: sql_list.append(self._groupby) if self._orderby is not None: sql_list.append(self._orderby) if self._limit is not None: sql_list.append(self._limit) return "%s;" % " ".join(sql_list)
[docs] def reset(self): """Clean internal variables""" self._select = None self._where = None self._orderby = None self._limit = None self._groupby = None
[docs]class Columns: """Object to work with columns table. It is possible to instantiate a Columns object given the table name and the database connection. For a sqlite table: >>> import sqlite3 >>> path = "$GISDBASE/$LOCATION_NAME/$MAPSET/sqlite/sqlite.db" >>> cols_sqlite = Columns(test_vector_name, sqlite3.connect(get_path(path))) >>> cols_sqlite.tname 'table_doctest_map' For a postgreSQL table: >>> import psycopg2 as pg # doctest: +SKIP >>> cols_pg = Columns( ... test_vector_name, pg.connect("host=localhost dbname=grassdb") ... ) # doctest: +SKIP >>> cols_pg.tname # doctest: +SKIP 'table_doctest_map' #doctest: +SKIP """ def __init__(self, tname, connection, key="cat"): self.tname = tname self.conn = connection self.key = key self.odict = None self.update_odict() def __contains__(self, item): return item in self.names() def __repr__(self): return "Columns(%r)" % list(self.items()) def __getitem__(self, key): return self.odict[key] def __setitem__(self, name, new_type): self.cast(name, new_type) self.update_odict(self) def __iter__(self): return self.odict.__iter__() def __len__(self): return self.odict.__len__() def __eq__(self, obj): """Return True if two table have the same columns. >>> import sqlite3 >>> path = "$GISDBASE/$LOCATION_NAME/$MAPSET/sqlite/sqlite.db" >>> connection = sqlite3.connect(get_path(path)) >>> cols0 = Columns(test_vector_name, connection) >>> cols1 = Columns(test_vector_name, connection) >>> cols0 == cols1 True """ return obj.tname == self.tname and obj.odict == self.odict def __ne__(self, other): return not self == other # Restore Python 2 hashing behaviour on Python 3 __hash__ = object.__hash__
[docs] def is_pg(self): """Return True if is a psycopg connection. >>> import sqlite3 >>> path = "$GISDBASE/$LOCATION_NAME/$MAPSET/sqlite/sqlite.db" >>> cols_sqlite = Columns(test_vector_name, sqlite3.connect(get_path(path))) >>> cols_sqlite.is_pg() False >>> import psycopg2 as pg # doctest: +SKIP >>> cols_pg = Columns( ... test_vector_name, pg.connect("host=localhost dbname=grassdb") ... ) # doctest: +SKIP >>> cols_pg.is_pg() # doctest: +SKIP True """ return hasattr(self.conn, "xid")
[docs] def update_odict(self): """Read columns name and types from table and update the odict attribute. """ if self.is_pg(): # is a postgres connection cur = self.conn.cursor() cur.execute("SELECT oid,typname FROM pg_type") diz = dict(cur.fetchall()) odict = OrderedDict() import psycopg2 as pg try: cur.execute(sql.SELECT.format(cols="*", tname=self.tname)) descr = cur.description for column in descr: name, ctype = column[:2] odict[name] = diz[ctype] except pg.ProgrammingError: pass self.odict = odict else: # is a sqlite connection cur = self.conn.cursor() cur.execute(sql.PRAGMA.format(tname=self.tname)) descr = cur.fetchall() odict = OrderedDict() for column in descr: name, ctype = column[1:3] odict[name] = ctype self.odict = odict values = ",".join( [ "?", ] * (len(self)) ) kv = ",".join(["%s=?" % k for k in self.odict.keys() if k != self.key]) where = "%s=?" % self.key self.insert_str = sql.INSERT.format(tname=self.tname, values=values) self.update_str = sql.UPDATE_WHERE.format( tname=self.tname, values=kv, condition=where )
[docs] def sql_descr(self, remove=None): """Return a string with description of columns. Remove it is used to remove a columns. >>> import sqlite3 >>> path = "$GISDBASE/$LOCATION_NAME/$MAPSET/sqlite/sqlite.db" >>> cols_sqlite = Columns(test_vector_name, sqlite3.connect(get_path(path))) >>> cols_sqlite.sql_descr() # doctest: +ELLIPSIS 'cat INTEGER, name varchar(50), value double precision' >>> import psycopg2 as pg # doctest: +SKIP >>> cols_pg = Columns( ... test_vector_name, pg.connect("host=localhost dbname=grassdb") ... ) # doctest: +SKIP >>> cols_pg.sql_descr() # doctest: +ELLIPSIS +SKIP 'cat INTEGER, name varchar(50), value double precision' """ if remove: return ", ".join( ["%s %s" % (key, val) for key, val in self.items() if key != remove] ) return ", ".join(["%s %s" % (key, val) for key, val in self.items()])
[docs] def types(self): """Return a list with the column types. >>> import sqlite3 >>> path = "$GISDBASE/$LOCATION_NAME/$MAPSET/sqlite/sqlite.db" >>> cols_sqlite = Columns(test_vector_name, sqlite3.connect(get_path(path))) >>> cols_sqlite.types() # doctest: +ELLIPSIS ['INTEGER', 'varchar(50)', 'double precision'] >>> import psycopg2 as pg # doctest: +SKIP >>> cols_pg = Columns( ... test_vector_name, pg.connect("host=localhost dbname=grassdb") ... ) # doctest: +SKIP >>> cols_pg.types() # doctest: +ELLIPSIS +SKIP ['INTEGER', 'varchar(50)', 'double precision'] """ return list(self.odict.values())
[docs] def names(self, remove=None, unicod=True): """Return a list with the column names. Remove it is used to remove a columns. >>> import sqlite3 >>> path = "$GISDBASE/$LOCATION_NAME/$MAPSET/sqlite/sqlite.db" >>> cols_sqlite = Columns(test_vector_name, sqlite3.connect(get_path(path))) >>> cols_sqlite.names() # doctest: +ELLIPSIS ['cat', 'name', 'value'] >>> import psycopg2 as pg # doctest: +SKIP >>> cols_pg = Columns( ... test_vector_name, # doctest: +SKIP ... pg.connect("host=localhost dbname=grassdb"), ... ) >>> cols_pg.names() # doctest: +ELLIPSIS +SKIP ['cat', 'name', 'value'] """ if remove: nams = list(self.odict.keys()) nams.remove(remove) else: nams = list(self.odict.keys()) if unicod: return nams return [str(name) for name in nams]
[docs] def items(self): """Return a list of tuple with column name and column type. >>> import sqlite3 >>> path = "$GISDBASE/$LOCATION_NAME/$MAPSET/sqlite/sqlite.db" >>> cols_sqlite = Columns(test_vector_name, sqlite3.connect(get_path(path))) >>> cols_sqlite.items() # doctest: +ELLIPSIS [('cat', 'INTEGER'), ('name', 'varchar(50)'), ('value', 'double precision')] >>> import psycopg2 as pg # doctest: +SKIP >>> cols_pg = Columns( ... test_vector_name, pg.connect("host=localhost dbname=grassdb") ... ) # doctest: +SKIP >>> cols_pg.items() # doctest: +ELLIPSIS +SKIP [('cat', 'INTEGER'), ('name', 'varchar(50)'), ('value', 'double precision')] """ return list(self.odict.items())
[docs] def add(self, col_name, col_type): """Add a new column to the table. :param col_name: the name of column to add :type col_name: str :param col_type: the tipe of column to add :type col_type: str >>> import sqlite3 >>> path = "$GISDBASE/$LOCATION_NAME/$MAPSET/sqlite/sqlite.db" >>> from grass.pygrass.utils import copy, remove >>> copy(test_vector_name, "mycensus", "vect") >>> cols_sqlite = Columns("mycensus", sqlite3.connect(get_path(path))) >>> cols_sqlite.add(["n_pizza"], ["INT"]) >>> "n_pizza" in cols_sqlite True >>> import psycopg2 as pg # doctest: +SKIP >>> cols_pg = Columns( ... "boundary_municp_pg", pg.connect("host=localhost dbname=grassdb") ... ) # doctest: +SKIP >>> cols_pg.add("n_pizza", "INT") # doctest: +SKIP >>> "n_pizza" in cols_pg # doctest: +SKIP True >>> remove("mycensus", "vect") """ def check(col_type): """Check the column type if it is supported by GRASS :param col_type: the type of column :type col_type: str """ valid_type = ( "DOUBLE PRECISION", "DOUBLE", "INT", "INTEGER", "DATE", "VARCHAR", ) col = col_type.upper() valid = [col.startswith(tp) for tp in valid_type] if not any(valid): str_err = "Type: %r is not supported.\nSupported types are: %s" raise TypeError(str_err % (col_type, ", ".join(valid_type))) return col_type col_type = ( [ check(col_type), ] if isinstance(col_type, str) else [check(col) for col in col_type] ) col_name = ( [ col_name, ] if isinstance(col_name, str) else col_name ) sqlcode = [ sql.ADD_COL.format(tname=self.tname, cname=cn, ctype=ct) for cn, ct in zip(col_name, col_type) ] cur = self.conn.cursor() cur.executescript("\n".join(sqlcode)) self.conn.commit() cur.close() self.update_odict()
[docs] def rename(self, old_name, new_name): """Rename a column of the table. :param old_name: the name of existing column :type old_name: str :param new_name: the name of new column :type new_name: str >>> import sqlite3 >>> path = "$GISDBASE/$LOCATION_NAME/$MAPSET/sqlite/sqlite.db" >>> from grass.pygrass.utils import copy, remove >>> copy(test_vector_name, "mycensus", "vect") >>> cols_sqlite = Columns("mycensus", sqlite3.connect(get_path(path))) >>> cols_sqlite.add(["n_pizza"], ["INT"]) >>> "n_pizza" in cols_sqlite True >>> cols_sqlite.rename("n_pizza", "n_pizzas") # doctest: +ELLIPSIS >>> "n_pizza" in cols_sqlite False >>> "n_pizzas" in cols_sqlite True >>> import psycopg2 as pg # doctest: +SKIP >>> cols_pg = Columns( ... test_vector_name, pg.connect("host=localhost dbname=grassdb") ... ) # doctest: +SKIP >>> cols_pg.rename("n_pizza", "n_pizzas") # doctest: +SKIP >>> "n_pizza" in cols_pg # doctest: +SKIP False >>> "n_pizzas" in cols_pg # doctest: +SKIP True >>> remove("mycensus", "vect") """ cur = self.conn.cursor() if self.is_pg(): cur.execute( sql.RENAME_COL.format( tname=self.tname, old_name=old_name, new_name=new_name ) ) self.conn.commit() cur.close() self.update_odict() else: cur.execute( sql.ADD_COL.format( tname=self.tname, cname=new_name, ctype=str(self.odict[old_name]) ) ) cur.execute( sql.UPDATE.format(tname=self.tname, new_col=new_name, old_col=old_name) ) self.conn.commit() cur.close() self.update_odict() self.drop(old_name)
[docs] def cast(self, col_name, new_type): """Change the column type. :param col_name: the name of column :type col_name: str :param new_type: the new type of column :type new_type: str >>> import sqlite3 >>> path = "$GISDBASE/$LOCATION_NAME/$MAPSET/sqlite/sqlite.db" >>> from grass.pygrass.utils import copy, remove >>> copy(test_vector_name, "mycensus", "vect") >>> cols_sqlite = Columns("mycensus", sqlite3.connect(get_path(path))) >>> cols_sqlite.add(["n_pizzas"], ["INT"]) >>> cols_sqlite.cast("n_pizzas", "float8") # doctest: +ELLIPSIS Traceback (most recent call last): ... grass.exceptions.DBError: SQLite does not support to cast columns. >>> import psycopg2 as pg # doctest: +SKIP >>> cols_pg = Columns( ... test_vector_name, pg.connect("host=localhost dbname=grassdb") ... ) # doctest: +SKIP >>> cols_pg.cast("n_pizzas", "float8") # doctest: +SKIP >>> cols_pg["n_pizzas"] # doctest: +SKIP 'float8' >>> remove("mycensus", "vect") .. warning :: It is not possible to cast a column with sqlite """ if self.is_pg(): cur = self.conn.cursor() cur.execute( sql.CAST_COL.format(tname=self.tname, col=col_name, ctype=new_type) ) self.conn.commit() cur.close() self.update_odict() else: # sqlite does not support rename columns: raise DBError("SQLite does not support to cast columns.")
[docs] def drop(self, col_name): """Drop a column from the table. :param col_name: the name of column to remove :type col_name: str >>> import sqlite3 >>> path = "$GISDBASE/$LOCATION_NAME/$MAPSET/sqlite/sqlite.db" >>> from grass.pygrass.utils import copy, remove >>> copy(test_vector_name, "mycensus", "vect") >>> cols_sqlite = Columns("mycensus", sqlite3.connect(get_path(path))) >>> cols_sqlite.drop("name") # doctest: +ELLIPSIS >>> "name" in cols_sqlite False >>> import psycopg2 as pg # doctest: +SKIP >>> cols_pg = Columns( ... test_vector_name, pg.connect("host=localhost dbname=grassdb") ... ) # doctest: +SKIP >>> cols_pg.drop("name") # doctest: +SKIP >>> "name" in cols_pg # doctest: +SKIP False >>> remove("mycensus", "vect") """ cur = self.conn.cursor() if self.is_pg(): cur.execute(sql.DROP_COL.format(tname=self.tname, cname=col_name)) else: desc = str(self.sql_descr(remove=col_name)) names = ", ".join(self.names(remove=col_name, unicod=False)) queries = sql.DROP_COL_SQLITE.format( tname=self.tname, keycol=self.key, coldef=desc, colnames=names ).split("\n") for query in queries: cur.execute(query) self.conn.commit() cur.close() self.update_odict()
[docs]class Table: """ >>> import sqlite3 >>> path = "$GISDBASE/$LOCATION_NAME/PERMANENT/sqlite/sqlite.db" >>> tab_sqlite = Table( ... name=test_vector_name, connection=sqlite3.connect(get_path(path)) ... ) >>> tab_sqlite.name 'table_doctest_map' >>> import psycopg2 # doctest: +SKIP >>> tab_pg = Table( ... test_vector_name, psycopg2.connect("host=localhost dbname=grassdb", "pg") ... ) # doctest: +SKIP >>> tab_pg.columns # doctest: +ELLIPSIS +SKIP Columns([('cat', 'int4'), ...]) """ def _get_name(self): """Private method to return the name of table""" return self._name def _set_name(self, new_name): """Private method to set the name of table :param new_name: the new name of table :type new_name: str """ old_name = self._name cur = self.conn.cursor() cur.execute(sql.RENAME_TAB.format(old_name=old_name, new_name=new_name)) self.conn.commit() cur.close() name = property(fget=_get_name, fset=_set_name, doc="Set and obtain table name") def __init__(self, name, connection, key="cat"): self._name = name self.conn = connection self.key = key self.columns = Columns(self.name, self.conn, self.key) self.filters = Filters(self.name) def __repr__(self): """ >>> import sqlite3 >>> path = "$GISDBASE/$LOCATION_NAME/PERMANENT/sqlite/sqlite.db" >>> tab_sqlite = Table( ... name=test_vector_name, connection=sqlite3.connect(get_path(path)) ... ) >>> tab_sqlite Table('table_doctest_map') """ return "Table(%r)" % (self.name) def __iter__(self): cur = self.execute() return (cur.fetchone() for _ in range(self.__len__())) def __len__(self): """Return the number of rows""" return self.n_rows()
[docs] def drop(self, cursor=None, force=False): """Method to drop table from database :param cursor: the cursor to connect, if None it use the cursor of connection table object :type cursor: Cursor object :param force: True to remove the table, by default False to print advice :type force: bool """ cur = cursor or self.conn.cursor() if self.exist(cursor=cur): used = db_table_in_vector(self.name) if used is not None and len(used) > 0 and not force: print( _("Deleting table <%s> which is attached to following map(s):") % self.name ) for vect in used: warning("%s" % vect) print(_("You must use the force flag to actually remove it. Exiting.")) else: cur.execute(sql.DROP_TAB.format(tname=self.name))
[docs] def n_rows(self): """Return the number of rows >>> import sqlite3 >>> path = "$GISDBASE/$LOCATION_NAME/$MAPSET/sqlite/sqlite.db" >>> tab_sqlite = Table( ... name=test_vector_name, connection=sqlite3.connect(get_path(path)) ... ) >>> tab_sqlite.n_rows() 3 """ cur = self.conn.cursor() cur.execute(sql.SELECT.format(cols="Count(*)", tname=self.name)) number = cur.fetchone()[0] cur.close() return number
[docs] def execute(self, sql_code=None, cursor=None, many=False, values=None): """Execute SQL code from a given string or build with filters and return a cursor object. :param sql_code: the SQL code to execute, if not pass it use filters variable :type sql_code: str :param cursor: the cursor to connect, if None it use the cursor of connection table object :type cursor: Cursor object :param many: True to run executemany function :type many: bool :param values: The values to substitute into sql_code string :type values: list of tuple >>> import sqlite3 >>> path = "$GISDBASE/$LOCATION_NAME/$MAPSET/sqlite/sqlite.db" >>> tab_sqlite = Table( ... name=test_vector_name, connection=sqlite3.connect(get_path(path)) ... ) >>> tab_sqlite.filters.select("cat", "name").order_by("value") Filters('SELECT cat, name FROM table_doctest_map ORDER BY value;') >>> cur = tab_sqlite.execute() >>> cur.fetchone() # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE (1, 'point') """ try: sqlc = sql_code or self.filters.get_sql() cur = cursor or self.conn.cursor() if many and values: return cur.executemany(sqlc, values) return cur.execute(sqlc, values) if values else cur.execute(sqlc) except Exception as exc: raise ValueError( "The SQL statement is not correct:\n%r,\n" "values: %r,\n" "SQL error: %s" % (sqlc, values, str(exc)) )
[docs] def exist(self, cursor=None): """Return True if the table already exist in the DB, False otherwise :param cursor: the cursor to connect, if None it use the cursor of connection table object """ cur = cursor or self.conn.cursor() return table_exist(cur, self.name)
[docs] def insert(self, values, cursor=None, many=False): """Insert a new row :param values: a tuple of values to insert, it is possible to insert more rows using a list of tuple and parameter `many` :type values: tuple :param cursor: the cursor to connect, if None it use the cursor of connection table object :type cursor: Cursor object :param many: True to run executemany function :type many: bool """ cur = cursor or self.conn.cursor() if many: return cur.executemany(self.columns.insert_str, values) return cur.execute(self.columns.insert_str, values)
[docs] def update(self, key, values, cursor=None): """Update a table row :param key: the rowid :type key: int :param values: the values to insert without row id. For example if we have a table with four columns: cat, c0, c1, c2 the values list should containing only c0, c1, c2 values. :type values: list :param cursor: the cursor to connect, if None it use the cursor of connection table object :type cursor: Cursor object """ cur = cursor or self.conn.cursor() vals = list(values) + [ key, ] return cur.execute(self.columns.update_str, vals)
[docs] def create(self, cols, name=None, overwrite=False, cursor=None): """Create a new table :param cols: :type cols: :param name: the name of table to create, None for the name of Table object :type name: str :param overwrite: overwrite existing table :type overwrite: bool :param cursor: the cursor to connect, if None it use the cursor of connection table object :type cursor: Cursor object """ cur = cursor or self.conn.cursor() coldef = ",\n".join(["%s %s" % col for col in cols]) newname = name or self.name try: cur.execute(sql.CREATE_TAB.format(tname=newname, coldef=coldef)) self.conn.commit() except OperationalError: # OperationalError if overwrite: self.drop(force=True) cur.execute(sql.CREATE_TAB.format(tname=newname, coldef=coldef)) self.conn.commit() else: print("The table: %s already exist." % self.name) cur.close() self.columns.update_odict()
if __name__ == "__main__": import doctest from grass.pygrass import utils utils.create_test_vector_map(test_vector_name) doctest.testmod() """Remove the generated vector map, if exist""" from grass.pygrass.utils import get_mapset_vector from grass.script.core import run_command mset = get_mapset_vector(test_vector_name, mapset="") if mset: run_command("g.remove", flags="f", type="vector", name=test_vector_name)