Source code for stoqlib.importers.csvimporter

# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4

##
## Copyright (C) 2007 Async Open Source
##
## This program is free software; you can redistribute it and/or
## modify it under the terms of the GNU Lesser General Public License
## as published by the Free Software Foundation; either version 2
## of the License, or (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
## GNU Lesser General Public License for more details.
##
## You should have received a copy of the GNU Lesser General Public License
## along with this program; if not, write to the Free Software
## Foundation, Inc., or visit: http://www.gnu.org/.
##
##
## Author(s): Stoq Team <stoq-devel@async.com.br>
##
##

"""
CSV import classes
"""

import csv
import datetime
import time

from stoqlib.database.runtime import new_store
from stoqlib.importers.importer import Importer
from stoqlib.lib.dateutils import localdate


[docs]class CSVRow(object):
    """A row in a CSV file
    """
    def __init__(self, item, field_names):
        self.fields = []
        for i, field in enumerate(item):
            # XXX: we expect to receive unicode data
            setattr(self, field_names[i], unicode(field, 'utf-8'))
            self.fields.append(field_names[i])

    def __repr__(self):
        return '<CSV line %s>' % ', '.join(
            ['%s=%r' % (f, getattr(self, f)) for f in self.fields])


[docs]class CSVImporter(Importer):
    """Class to assist the process of importing csv files.

    :cvar fields: field names, a list of strings
    :cvar optional_fields: optional field names, a list of strings
    :cvar dialect: optional, csv dialect, defaults to excel
    """
    fields = []
    optional_fields = []
    dialect = 'excel'

    def __init__(self, lines=500, dry=False):
        """
        Create a new CSVImporter object.
        :param lines: see :class:`set_lines_per_commit`
        :param dry: see :class:`set_dry`
        """
        Importer.__init__(self, items=lines, dry=dry)
        self.lines = lines

    #
    # Public API
    #

[docs]    def feed(self, fp, filename='<stdin>'):
        store = new_store()
        self.before_start(store)
        store.commit(close=True)
        self.lineno = 1
        self.rows = list(csv.reader(fp, dialect=self.dialect))

[docs]    def get_n_items(self):
        return len(self.rows)

[docs]    def process_item(self, store, item_no):
        t = time.time()
        item = self.rows[item_no]
        if not item or item[0].startswith('%'):
            self.lineno += 1
            return False
        if len(item) < len(self.fields):
            raise ValueError(
                "line %d in file %s has %d fields, but we need at "
                "least %d fields to be able to process it" % (self.lineno,
                                                              self.filename,
                                                              len(item),
                                                              len(self.fields)))

        field_names = self.fields + self.optional_fields
        if len(item) > len(field_names):
            raise ValueError(
                "line %d in file %s has %d fields, but we can at most "
                "handle %d fields, fields=%r" % (self.lineno,
                                                 self.filename,
                                                 len(item),
                                                 len(field_names),
                                                 item))

        row = CSVRow(item, field_names)
        try:
            self.process_one(row, row.fields, store)
        except Exception:
            print()
            print('Error while processing row %d %r' % (self.lineno, row, ))
            print()
            raise

        if self.items != -1:
            if self.lineno % self.items == 0:
                t2 = time.time()
                print('%s Imported %d entries in %2.2f sec total=%d' % (
                    datetime.datetime.now().strftime('%H:%M:%S'), self.items,
                    t2 - t, self.lineno))
                t = t2

        self.lineno += 1
        return True

[docs]    def parse_date(self, data):
        return localdate(*map(int, data.split('-')))

[docs]    def parse_multi(self, domain_class, field, store):
        if field == '*':
            field_values = store.find(domain_class)
        else:
            items = store.find(domain_class).order_by(domain_class.te_id)
            field_values = [items[int(field_id) - 1]
                            for field_id in field.split('|')]
        return field_values

    #
    # Override this in a subclass
    #

[docs]    def process_one(self, row, fields, store):
        """Processes one line in a csv file, you can access the columns
        using attributes on the data object.
        :param row: object representing a row in the input
        :param fields: a list of fields set in data
        :param store: a store
        """
        raise NotImplementedError

[docs]    def read(self, iterable):
        """This can be overridden by as subclass which wishes to specialize
        the CSV reader.
        :param iterable: a sequence of lines which are going to be read
        :returns: a sequence of parsed items
        """