Source code for stoqlib.importers.csvimporter

# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4

##
## Copyright (C) 2007 Async Open Source
##
## This program is free software; you can redistribute it and/or
## modify it under the terms of the GNU Lesser General Public License
## as published by the Free Software Foundation; either version 2
## of the License, or (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
## GNU Lesser General Public License for more details.
##
## You should have received a copy of the GNU Lesser General Public License
## along with this program; if not, write to the Free Software
## Foundation, Inc., or visit: http://www.gnu.org/.
##
##
## Author(s): Stoq Team <stoq-devel@async.com.br>
##
##

"""
CSV import classes
"""

import csv
import datetime
import time

from stoqlib.database.runtime import new_store
from stoqlib.importers.importer import Importer
from stoqlib.lib.dateutils import localdate


[docs]class CSVRow(object): """A row in a CSV file """ def __init__(self, item, field_names): self.fields = [] for i, field in enumerate(item): # XXX: we expect to receive unicode data setattr(self, field_names[i], unicode(field, 'utf-8')) self.fields.append(field_names[i]) def __repr__(self): return '<CSV line %s>' % ', '.join( ['%s=%r' % (f, getattr(self, f)) for f in self.fields])
[docs]class CSVImporter(Importer): """Class to assist the process of importing csv files. :cvar fields: field names, a list of strings :cvar optional_fields: optional field names, a list of strings :cvar dialect: optional, csv dialect, defaults to excel """ fields = [] optional_fields = [] dialect = 'excel' def __init__(self, lines=500, dry=False): """ Create a new CSVImporter object. :param lines: see :class:`set_lines_per_commit` :param dry: see :class:`set_dry` """ Importer.__init__(self, items=lines, dry=dry) self.lines = lines # # Public API #
[docs] def feed(self, fp, filename='<stdin>'): store = new_store() self.before_start(store) store.commit(close=True) self.lineno = 1 self.rows = list(csv.reader(fp, dialect=self.dialect))
[docs] def get_n_items(self): return len(self.rows)
[docs] def process_item(self, store, item_no): t = time.time() item = self.rows[item_no] if not item or item[0].startswith('%'): self.lineno += 1 return False if len(item) < len(self.fields): raise ValueError( "line %d in file %s has %d fields, but we need at " "least %d fields to be able to process it" % (self.lineno, self.filename, len(item), len(self.fields))) field_names = self.fields + self.optional_fields if len(item) > len(field_names): raise ValueError( "line %d in file %s has %d fields, but we can at most " "handle %d fields, fields=%r" % (self.lineno, self.filename, len(item), len(field_names), item)) row = CSVRow(item, field_names) try: self.process_one(row, row.fields, store) except Exception: print() print('Error while processing row %d %r' % (self.lineno, row, )) print() raise if self.items != -1: if self.lineno % self.items == 0: t2 = time.time() print('%s Imported %d entries in %2.2f sec total=%d' % ( datetime.datetime.now().strftime('%H:%M:%S'), self.items, t2 - t, self.lineno)) t = t2 self.lineno += 1 return True
[docs] def parse_date(self, data): return localdate(*map(int, data.split('-')))
[docs] def parse_multi(self, domain_class, field, store): if field == '*': field_values = store.find(domain_class) else: items = store.find(domain_class).order_by(domain_class.te_id) field_values = [items[int(field_id) - 1] for field_id in field.split('|')] return field_values
# # Override this in a subclass #
[docs] def process_one(self, row, fields, store): """Processes one line in a csv file, you can access the columns using attributes on the data object. :param row: object representing a row in the input :param fields: a list of fields set in data :param store: a store """ raise NotImplementedError
[docs] def read(self, iterable): """This can be overridden by as subclass which wishes to specialize the CSV reader. :param iterable: a sequence of lines which are going to be read :returns: a sequence of parsed items """