Rudiments
Public Member Functions | Protected Member Functions | List of all members
csvdom Class Reference

#include <csvdom.h>

Inherits csvsax, and dom.

Public Member Functions

 csvdom ()
 
 csvdom (bool stringcacheenabled)
 
virtual ~csvdom ()
 
virtual const chargetType ()
 
bool parse (input *in)
 
bool parseFile (const char *filename)
 
bool parseString (const char *string)
 
void createRootNode ()
 
uint64_t getColumnCount ()
 
const chargetColumnName (uint64_t position)
 
bool getColumnPosition (const char *name, uint64_t *position)
 
bool upperCaseColumn (uint64_t position)
 
bool upperCaseColumn (const char *name)
 
bool upperCaseColumns ()
 
bool lowerCaseColumn (uint64_t position)
 
bool lowerCaseColumn (const char *name)
 
bool lowerCaseColumns ()
 
bool rightTrimColumn (uint64_t position)
 
bool rightTrimColumn (const char *name)
 
bool rightTrimColumns ()
 
bool leftTrimColumn (uint64_t position)
 
bool leftTrimColumn (const char *name)
 
bool leftTrimColumns ()
 
bool bothTrimColumn (uint64_t position)
 
bool bothTrimColumn (const char *name)
 
bool bothTrimColumns ()
 
bool renameColumn (uint64_t position, const char *name, bool quoted)
 
bool renameColumn (const char *oldname, const char *newname, bool quoted)
 
bool renameColumn (const char *const *oldnames, const char *newname, bool quoted)
 
bool renameAllColumns (const char *oldname, const char *newname, bool quoted)
 
bool renameAllColumns (const char *const *oldnames, const char *newname, bool quoted)
 
bool insertColumnBefore (uint64_t position, const char *name, bool quoted)
 
bool insertColumnBefore (const char *name, const char *newname, bool quoted)
 
bool insertColumnAfter (uint64_t position, const char *name, bool quoted)
 
bool insertColumnAfter (const char *name, const char *newname, bool quoted)
 
bool moveColumn (uint64_t oldposition, uint64_t newposition)
 
bool moveColumn (const char *name, uint64_t position)
 
bool deleteColumn (uint64_t position)
 
bool deleteColumn (const char *name)
 
bool getColumnIsEmpty (uint64_t position)
 
bool getColumnIsEmpty (const char *name)
 
bool deleteEmptyColumns ()
 
uint64_t getRecordCount ()
 
domnodegetRecord (uint64_t position)
 
const chargetField (uint64_t record, uint64_t column)
 
const chargetField (uint64_t record, const char *column)
 
bool setField (uint64_t record, uint64_t column, const char *value, bool quoted)
 
bool setField (uint64_t record, const char *column, const char *value, bool quoted)
 
bool rightTrimField (uint64_t record, uint64_t column)
 
bool rightTrimField (uint64_t record, const char *column)
 
bool leftTrimField (uint64_t record, uint64_t column)
 
bool leftTrimField (uint64_t record, const char *column)
 
bool bothTrimField (uint64_t record, uint64_t column)
 
bool bothTrimField (uint64_t record, const char *column)
 
void rightTrimFields ()
 
void leftTrimFields ()
 
void bothTrimFields ()
 
bool insertRecordBefore (uint64_t position)
 
bool insertRecordAfter (uint64_t position)
 
bool moveRecord (uint64_t oldposition, uint64_t newposition)
 
bool deleteRecord (uint64_t position)
 
bool getRecordIsEmpty (uint64_t position)
 
bool deleteEmptyRecords ()
 
void carryValueDown (uint64_t position)
 
void carryValueDown (const char *name)
 
void carryAllValuesDown ()
 
- Public Member Functions inherited from csvsax
 csvsax ()
 
virtual ~csvsax ()
 
void setQuote (char quote)
 
char getQuote ()
 
void setDelimiter (char delimiter)
 
char getDelimiter ()
 
- Public Member Functions inherited from sax
 sax ()
 
virtual ~sax ()
 
void setIgnoreHeaderLines (uint64_t lines)
 
uint64_t getIgnoreHeaderLines ()
 
void setIgnoreFooterLines (uint64_t lines)
 
uint64_t getIgnoreFooterLines ()
 
const chargetError ()
 
- Public Member Functions inherited from object
virtual ~object ()
 
- Public Member Functions inherited from dom
 dom ()
 
 dom (bool stringcacheenabled)
 
virtual ~dom ()
 
virtual domnodegetRootNode ()
 
virtual domnodegetNullNode ()
 
virtual ssize_t writeFile (const char *filename, mode_t perms)
 
virtual ssize_t writeFile (const char *filename, mode_t perms, bool indent)
 
virtual ssize_t write ()
 
virtual ssize_t write (bool indent)
 
virtual ssize_t write (output *out)
 
virtual ssize_t write (output *out, bool indent)
 
virtual ssize_t writeXml ()
 
virtual ssize_t writeXml (bool indent)
 
virtual ssize_t writeXml (output *out)
 
virtual ssize_t writeXml (output *out, bool indent)
 
bool getStringCacheEnabled ()
 
- Public Member Functions inherited from collection
 collection ()
 
 collection (collection &c)
 
collectionoperator= (collection &c)
 
virtual ~collection ()
 
virtual uint64_t getCount ()=0
 
comparatorgetComparator ()
 
void setComparator (comparator *newcomp)
 
virtual bool isReadOnly ()
 
virtual bool isBlockBased ()
 
virtual uint64_t getBlockSize ()
 
virtual bool isSequentialAccess ()
 
virtual void setManageValues (bool manage)
 
virtual bool getManageValues ()
 
virtual void setManageArrayValues (bool manage)
 
virtual bool getManageArrayValues ()
 
virtual void setManageKeys (bool manage)
 
virtual bool getManageKeys ()
 
virtual void setManageArrayKeys (bool manage)
 
virtual bool getManageArrayKeys ()
 
virtual bool clear ()=0
 
virtual ssize_t writeJson ()=0
 
virtual ssize_t writeJson (bool indent)=0
 
virtual ssize_t writeJson (output *out)=0
 
virtual ssize_t writeJson (output *out, bool indent)=0
 

Protected Member Functions

virtual bool headerStart ()
 
virtual bool column (const char *name, bool quoted)
 
virtual bool headerEnd ()
 
virtual bool bodyStart ()
 
virtual bool recordStart ()
 
virtual bool field (const char *value, bool quoted)
 
virtual bool recordEnd ()
 
virtual bool bodyEnd ()
 

Detailed Description

The csvdom class implements a minimal CSV DOM parser.

It parses a file or string of CSV-formatted data and produces a dom tree representing the data. It creates an "h" element for the header, "c" element for each column, "r" element for each record, and "f" element for each field. To each "c" and "f" element, it adds a "v" attribute, containing the value, and a "q" element which can contain "y" or "n" indicating whether or not the value is quoted.

For example, the following CSV:

"col1","col2",col3,col4 field11,field12,"field13","field14" "field21",field22,field23,"field24" field31,"field32",field33,field34

would produce the following dom tree:

<h> </h> <r> <f v="field11" q="n"> <f v="field12" q="n"> <f v="field13" q="y"> <f v="field14" q="y"> </r> <r> <f v="field21" q="y"> <f v="field22" q="n"> <f v="field23" q="n"> <f v="field24" q="y"> </r> <r> <f v="field31" q="n"> <f v="field32" q="y"> <f v="field33" q="n"> <f v="field34" q="n"> </r>

Constructor & Destructor Documentation

◆ csvdom() [1/2]

csvdom::csvdom ( )

Creates a new instance of the csvdom class.

◆ csvdom() [2/2]

csvdom::csvdom ( bool stringcacheenabled)

Creates a new instance of the csvdom class, allowing the user to enable or disable the "string cache" for this instance.

By default, as each csvdomnode is added to the csvdom tree, tag and attribute names and values are stored in a string cache and pointers into the cache are assigned to each node. Reference counters are kept and the string is removed from the cache when no more nodes are using it. If the data is highly repetitive this conserves memory at the cost of speed.

If the string cache is disabled then memory is allocated in each csvdomnode for names and values and freed when the csvdomnode is freed. This is faster but uses much more memory.

◆ ~csvdom()

virtual csvdom::~csvdom ( )
virtual

Deletes this instance of the csvdom class.

Member Function Documentation

◆ bodyEnd()

virtual bool csvdom::bodyEnd ( )
protectedvirtual

Gets called when the end of the body is encountered.

Reimplemented from csvsax.

◆ bodyStart()

virtual bool csvdom::bodyStart ( )
protectedvirtual

Gets called when the start of the body is encountered.

Reimplemented from csvsax.

◆ bothTrimColumn() [1/2]

bool csvdom::bothTrimColumn ( const char * name)

Right-trims and left-trims the name of the column "name". Returns true on success and false if "name" doesn't exist.

◆ bothTrimColumn() [2/2]

bool csvdom::bothTrimColumn ( uint64_t position)

Right-trims and left-trims the name of the column at "position". Returns true on success and false if "position" doesn't exist.

◆ bothTrimColumns()

bool csvdom::bothTrimColumns ( )

Right-trims and left-trims all column names.

◆ bothTrimField() [1/2]

bool csvdom::bothTrimField ( uint64_t record,
const char * column )

Right-trims and left-trims the value at "record","column". Returns true on success and false if no such record/column exists.

◆ bothTrimField() [2/2]

bool csvdom::bothTrimField ( uint64_t record,
uint64_t column )

Right-trims and left-trims the value at "record","column". Returns true on success and false if no such record/column exists.

◆ bothTrimFields()

void csvdom::bothTrimFields ( )

Right-trims and left-trims all values.

◆ carryAllValuesDown()

void csvdom::carryAllValuesDown ( )

For each record, for all fields, if the value for the field in that record is empty, then replace it with the value of the field at the same position from the previous record.

◆ carryValueDown() [1/2]

void csvdom::carryValueDown ( const char * name)

For each record, at the field in column "name", if the value for the field in that record is empty, then replace it with the value of the field at the same position from the previous record.

◆ carryValueDown() [2/2]

void csvdom::carryValueDown ( uint64_t position)

For each record, at the field in column "position", if the value for the field in that record is empty, then replace it with the value of the field at the same position from the previous record.

◆ column()

virtual bool csvdom::column ( const char * name,
bool quoted )
protectedvirtual

Gets called when a column name is parsed.

Reimplemented from csvsax.

◆ createRootNode()

void csvdom::createRootNode ( )
virtual

Creates a new root node. This is useful for building a tree from scratch.

Reimplemented from dom.

◆ deleteColumn() [1/2]

bool csvdom::deleteColumn ( const char * name)

Deletes the column named "name". Returns true on success and false if "name" doesn't exist.

◆ deleteColumn() [2/2]

bool csvdom::deleteColumn ( uint64_t position)

Deletes the column at "position". Returns true on success and false if "position" doesn't exist.

◆ deleteEmptyColumns()

bool csvdom::deleteEmptyColumns ( )

Deletes columns composed entirely of empty fields. Returns true on success and false on failure.

◆ deleteEmptyRecords()

bool csvdom::deleteEmptyRecords ( )

Deletes records composed entirely of empty fields. Returns true on success and false on failure.

◆ deleteRecord()

bool csvdom::deleteRecord ( uint64_t position)

Deletes the record at "position". Returns true on success and false if "position" doesn't exist.

◆ field()

virtual bool csvdom::field ( const char * value,
bool quoted )
protectedvirtual

Gets called when a field is parsed.

Reimplemented from csvsax.

◆ getColumnCount()

uint64_t csvdom::getColumnCount ( )

Returns the number of columns in the CSV.

◆ getColumnIsEmpty() [1/2]

bool csvdom::getColumnIsEmpty ( const char * name)

Returns true if the column "name" is composed entirely of empty fields.

◆ getColumnIsEmpty() [2/2]

bool csvdom::getColumnIsEmpty ( uint64_t position)

Returns true if the column at "position" is composed entirely of empty fields.

◆ getColumnName()

const char * csvdom::getColumnName ( uint64_t position)

Returns the column name of the column at "position". Returns NULL if "position" doesn't exist.

◆ getColumnPosition()

bool csvdom::getColumnPosition ( const char * name,
uint64_t * position )

Sets "position" to the position of the column named "name". Returns true on success and false if "name" doesn't exist.

◆ getField() [1/2]

const char * csvdom::getField ( uint64_t record,
const char * column )

Returns the value at "record","column" or NULL if no such record/column exists.

◆ getField() [2/2]

const char * csvdom::getField ( uint64_t record,
uint64_t column )

Returns the value at "record","column" or NULL if no such record/column exists.

◆ getRecord()

domnode * csvdom::getRecord ( uint64_t position)

Returns record at "position".

◆ getRecordCount()

uint64_t csvdom::getRecordCount ( )

Returns the number of records in the CSV.

◆ getRecordIsEmpty()

bool csvdom::getRecordIsEmpty ( uint64_t position)

Returns true if the record at "position" is composed entirely of empty fields.

◆ getType()

virtual const char * csvdom::getType ( )
virtual

Returns "csvdom".

Reimplemented from dom.

◆ headerEnd()

virtual bool csvdom::headerEnd ( )
protectedvirtual

Gets called when the end of the header is encountered.

Reimplemented from csvsax.

◆ headerStart()

virtual bool csvdom::headerStart ( )
protectedvirtual

Gets called when the start of the header is encountered.

Reimplemented from csvsax.

◆ insertColumnAfter() [1/2]

bool csvdom::insertColumnAfter ( const char * name,
const char * newname,
bool quoted )

Inserts a column after column named "name" and names it "newname". Sets the column name to be quoted if "quoted" is true, or false otherwise. If "name" is null or empty then the column name will be set to an empty string. Returns true on success and false if "name" doesn't exist.

◆ insertColumnAfter() [2/2]

bool csvdom::insertColumnAfter ( uint64_t position,
const char * name,
bool quoted )

Inserts a column after "position" with name "name". Sets the column name to be quoted if "quoted" is true, or false otherwise. If "name" is null or empty then the column name will be set to an empty string. Returns true on success and false if "position" doesn't exist.

◆ insertColumnBefore() [1/2]

bool csvdom::insertColumnBefore ( const char * name,
const char * newname,
bool quoted )

Inserts a column before column named "name" and names it "newname". Sets the column name to be quoted if "quoted" is true, or false otherwise. If "name" is null or empty then the column name will be set to an empty string. Returns true on success and false if "name" doesn't exist.

◆ insertColumnBefore() [2/2]

bool csvdom::insertColumnBefore ( uint64_t position,
const char * name,
bool quoted )

Inserts a column before "position" and names it "name". Sets the column name to be quoted if "quoted" is true, or false otherwise. If "name" is null or empty then the column name will be set to an empty string. Returns true on success and false if "position" doesn't exist.

◆ insertRecordAfter()

bool csvdom::insertRecordAfter ( uint64_t position)

Inserts a record after "position". Returns true on success and false if "position" doesn't exist.

◆ insertRecordBefore()

bool csvdom::insertRecordBefore ( uint64_t position)

Inserts a record before "position". Returns true on success and false if "position" doesn't exist.

◆ leftTrimColumn() [1/2]

bool csvdom::leftTrimColumn ( const char * name)

Left-trims the name of the column "name". Returns true on success and false if "name" doesn't exist.

◆ leftTrimColumn() [2/2]

bool csvdom::leftTrimColumn ( uint64_t position)

Left-trims the name of the column at "position". Returns true on success and false if "position" doesn't exist.

◆ leftTrimColumns()

bool csvdom::leftTrimColumns ( )

Left-trims all column names.

◆ leftTrimField() [1/2]

bool csvdom::leftTrimField ( uint64_t record,
const char * column )

Left-trims the value at "record","column". Returns true on success and false if no such record/column exists.

◆ leftTrimField() [2/2]

bool csvdom::leftTrimField ( uint64_t record,
uint64_t column )

Left-trims the value at "record","column". Returns true on success and false if no such record/column exists.

◆ leftTrimFields()

void csvdom::leftTrimFields ( )

Left-trims all values.

◆ lowerCaseColumn() [1/2]

bool csvdom::lowerCaseColumn ( const char * name)

Lower-cases the name of the column "name". Returns true on success and false if "name" doesn't exist.

◆ lowerCaseColumn() [2/2]

bool csvdom::lowerCaseColumn ( uint64_t position)

Lower-cases the name of the column at "position". Returns true on success and false if "position" doesn't exist.

◆ lowerCaseColumns()

bool csvdom::lowerCaseColumns ( )

Lower-cases all column names.

◆ moveColumn() [1/2]

bool csvdom::moveColumn ( const char * name,
uint64_t position )

Moves column named "name" to "position". Returns true on success and false if "name" doesn't exist, or if position is greater than the one more than the last position.

◆ moveColumn() [2/2]

bool csvdom::moveColumn ( uint64_t oldposition,
uint64_t newposition )

Moves column at "oldposition" to "newposition". Returns true on success and false if "oldposition" doesn't exist or if "newposition" is greater than one more than the last position.

◆ moveRecord()

bool csvdom::moveRecord ( uint64_t oldposition,
uint64_t newposition )

Moves record at "oldposition" to "newposition". Returns true on success and false if "oldposition" doesn't exist or if "newposition" is greater than one more than the last position.

◆ parse()

bool csvdom::parse ( input * in)
virtual

Parses generic input "in" and generates a DOM tree.

Reimplemented from sax.

◆ parseFile()

bool csvdom::parseFile ( const char * filename)
virtual

Parses file "filename" and generates a DOM tree.

Reimplemented from sax.

◆ parseString()

bool csvdom::parseString ( const char * string)
virtual

Parses string "string" and generates a DOM tree.

Reimplemented from sax.

◆ recordEnd()

virtual bool csvdom::recordEnd ( )
protectedvirtual

Gets called when the end of a record is encountered.

Reimplemented from csvsax.

◆ recordStart()

virtual bool csvdom::recordStart ( )
protectedvirtual

Gets called when the start of a record is encountered.

Reimplemented from csvsax.

◆ renameAllColumns() [1/2]

bool csvdom::renameAllColumns ( const char *const * oldnames,
const char * newname,
bool quoted )

Renames all columns named any of the names in the NULL-terminated array of names "oldname" to "newname". Sets the column name to be quoted if "quoted" is true, or false otherwise. Returns true on success and false if "oldname" doesn't exist.

◆ renameAllColumns() [2/2]

bool csvdom::renameAllColumns ( const char * oldname,
const char * newname,
bool quoted )

Renames all columns named "oldname" to "newname". Sets the column name to be quoted if "quoted" is true, or false otherwise. Returns true on success and false if "oldname" doesn't exist.

◆ renameColumn() [1/3]

bool csvdom::renameColumn ( const char *const * oldnames,
const char * newname,
bool quoted )

Renames the first column named any of the names in the NULL-terminated array of names "oldname" to "newname". Sets the column name to be quoted if "quoted" is true, or false otherwise. Returns true on success and false if "oldname" doesn't exist.

◆ renameColumn() [2/3]

bool csvdom::renameColumn ( const char * oldname,
const char * newname,
bool quoted )

Renames the first column named "oldname" to "newname". Sets the column name to be quoted if "quoted" is true, or false otherwise. Returns true on success and false if "oldname" doesn't exist.

◆ renameColumn() [3/3]

bool csvdom::renameColumn ( uint64_t position,
const char * name,
bool quoted )

Renames the column at "position" to "name". Sets the column name to be quoted if "quoted" is true, or false otherwise. Returns true on success and false if "position" doesn't exist.

◆ rightTrimColumn() [1/2]

bool csvdom::rightTrimColumn ( const char * name)

Right-trims the name of the column "name". Returns true on success and false if "name" doesn't exist.

◆ rightTrimColumn() [2/2]

bool csvdom::rightTrimColumn ( uint64_t position)

Right-trims the name of the column at "position". Returns true on success and false if "position" doesn't exist.

◆ rightTrimColumns()

bool csvdom::rightTrimColumns ( )

Right-trims all column names.

◆ rightTrimField() [1/2]

bool csvdom::rightTrimField ( uint64_t record,
const char * column )

Right-trims the value at "record","column". Returns true on success and false if no such record/column exists.

◆ rightTrimField() [2/2]

bool csvdom::rightTrimField ( uint64_t record,
uint64_t column )

Right-trims the value at "record","column". Returns true on success and false if no such record/column exists.

◆ rightTrimFields()

void csvdom::rightTrimFields ( )

Right-trims all values.

◆ setField() [1/2]

bool csvdom::setField ( uint64_t record,
const char * column,
const char * value,
bool quoted )

Sets the value at "record","column" to "value" and marks it "quoted". Returns true on success and false if no such record/column exists.

◆ setField() [2/2]

bool csvdom::setField ( uint64_t record,
uint64_t column,
const char * value,
bool quoted )

Sets the value at "record","column" to "value" and marks it "quoted". Returns true on success and false if no such record/column exists.

◆ upperCaseColumn() [1/2]

bool csvdom::upperCaseColumn ( const char * name)

Upper-cases the name of the column "name". Returns true on success and false if "name" doesn't exist.

◆ upperCaseColumn() [2/2]

bool csvdom::upperCaseColumn ( uint64_t position)

Upper-cases the name of the column at "position". Returns true on success and false if "position" doesn't exist.

◆ upperCaseColumns()

bool csvdom::upperCaseColumns ( )

Upper-cases all column names.