Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import parsecsv, matrix, strutils
- export matrix
- type
- DType = enum f32, f64, i32, i64, str
- DFCol = object
- name: string
- dtypeCounts: array[DType, int]
- case dtype: DType
- of f32: f32s: seq[float32]
- of f64: f64s: seq[float64]
- of i32: i32s: seq[int32]
- of i64: i64s: seq[int64]
- of str: strs: seq[string]
- DF = object
- cols: seq[DFCol]
- DFError = object of Exception
- CsvParams = object
- sep*, quote*, escape*: char
- skipInitialSpace*, hasHeader*: bool
- # CsvInfo = object
- # rows*, columns*: int
- # header*: seq[string]
- # Csv = object
- # params: CsvParams
- # info: CsvInfo
- # rows: seq[seq[string]]
- template dfAssert(cond, msg) =
- if not cond:
- {.line: instantiationInfo().}:
- raise newException(DFError, msg)
- const
- axisCol* = 0
- axisRow* = 1
- proc dtypeOf*(T: typedesc): DType =
- when T is float32: f32
- elif T is float64: f64
- elif T is int32: i32
- elif T is int64: i64
- elif T is string: str
- else: static: doAssert false
- proc len*(col: DFCol): int =
- case col.dtype
- of f32: col.f32s.len
- of f64: col.f64s.len
- of i32: col.i32s.len
- of i64: col.i64s.len
- of str: col.strs.len
- proc initCsvParams*(
- sep = ',',
- quote = '\"',
- escape = '\x00',
- skipInitialSpace = false,
- hasHeader = false,
- ): CsvParams =
- CsvParams(
- sep: sep,
- quote: quote,
- escape: escape,
- skipInitialSpace: skipInitialSpace,
- hasHeader: hasHeader)
- proc initDFCol*(dtype: static DType, size = 0, name = ""): DFCol =
- when dtype == f32:
- DFCol(dtype: dtype, name: name, f32s: newSeq[float32](size))
- elif dtype == f64:
- DFCol(dtype: dtype, name: name, f64s: newSeq[float64](size))
- elif dtype == i32:
- DFCol(dtype: dtype, name: name, i32s: newSeq[int32](size))
- elif dtype == i64:
- DFCol(dtype: dtype, name: name, i64s: newSeq[int64](size))
- elif dtype == str:
- DFCol(dtype: dtype, name: name, strs: newSeq[string](size))
- proc parseDType(s: string): DType =
- try:
- let i = parseBiggestInt(s)
- result = if i >= low(int32) and i <= high(int32): i32 else: i64
- except ValueError:
- try:
- let f = parseFloat(s)
- result = if f >= low(float32) and f <= high(float32): f32 else: f64
- except ValueError:
- result = str
- proc readCsv*(filename: string, params: CsvParams): DF =
- var parser: CsvParser
- parser.open(
- filename,
- params.sep,
- params.quote,
- params.escape,
- params.skipInitialSpace)
- defer: parser.close()
- if params.hasHeader:
- dfAssert(parser.readRow(), "Failed to read header row.")
- for column in parser.row:
- result.cols.add(initDFCol(str, name = column))
- while parser.readRow(result.cols.len):
- if unlikely(result.cols.len == 0):
- for column in parser.row:
- result.cols.add(initDFCol(str))
- for i, s in parser.row:
- result.cols[i].dtypeCounts[parseDType(s)] += 1
- result.cols[i].strs.add(s)
- proc names*(df: DF): seq[string] =
- for col in df.cols:
- result.add(col.name)
- proc shape*(df: DF): (int, int) =
- if df.cols.len == 0: (0, 0) else: (df.cols[0].len, df.cols.len)
- proc dtypeCounts*(df: DF): seq[array[DType, int]] =
- for col in df.cols:
- result.add(col.dtypeCounts)
- # proc filter(row: seq[string], ignoreCols: openarray[int]): seq[string] =
- # for i in 0..<row.len:
- # if i notin ignoreCols:
- # result.add(row[i])
- # proc readCsvInfo*(
- # filename: string,
- # params: CsvParams,
- # ignoreCols: openarray[int] = []
- # ): CsvInfo =
- # initCsv(filename, params, false, ignoreCols).info
- # proc parseAs(str: string, T: typedesc[SomeFloat]): T = parseFloat(str)
- # proc parseAs(str: string, T: typedesc[SomeInteger]): T = parseInt(str)
- # proc parseAs(str: string, T: typedesc[string]): T = str
- # proc readCsv*[T](
- # filename: string,
- # params: CsvParams,
- # ignoreCols: openarray[int] = []
- # ): SimpleMatrix[T] =
- # let csv = initCsv(filename, params, true, ignoreCols)
- # result = newSimpleMatrix[T](csv.info.rows, csv.info.columns)
- # for row in 0..<csv.info.rows:
- # for col in 0..<csv.info.columns:
- # result[row, col] = csv.rows[row][col].parseAs(T)
- # proc readCsvCol*[T](
- # filename: string,
- # params: CsvParams,
- # col: int
- # ): seq[T] =
- # var parser = openScopedParser(filename, params)
- # if params.hasHeader: doAssert(parser.readRow(), "Csv header read failed.")
- # while parser.readRow(): result.add(parser.row[col].parseAs(T))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement