Searching a CSV File Using Grep

0 votes
asked Mar 3, 2010 by david

Lets say I have a csv file like this:

a,b1,12,
a,b1,42,
d,e1,12,
r,12,33,

I want to use grep to return only only the rows where the third column = 12. So it would return:

a,b1,12,
d,e1,12,

but not:

r,12,33,

Any ideas for a regular expression that will allow me to do this?

5 Answers

0 votes
answered Jan 3, 2010 by dennis-williamson

Here's a variation:

egrep "^([^,]+,){2}12," file.csv

The advantage is that you can select the field simply by changing the number enclosed in curly braces without having to add or subtract literal copies of the pattern manually.

0 votes
answered Jan 4, 2010 by ghostdog74

when you have csv files, where you have distinct delimiters such as commas, use the splitting on field/delimiters approach, not regular expression. Tools to break strings up like awk, Perl/Python does the job easily for you (Perl/Python has support for csv modules for more complex csv parsing)

Perl,

$ perl -F/,/ -alne  'print if $F[2]==12;' file
a,b1,12,
d,e1,12,

$ awk -F"," '$3==12' file
a,b1,12,
d,e1,12,

or with just the shell

while IFS="," read a b c d
do
    case "$c" in
        12) echo "$a,$b,$c,$d"
    esac
done <"file"
0 votes
answered Mar 3, 2010 by vivin-paliath
grep "^[^,]\+,[^,]\+,12," file.csv
0 votes
answered Mar 3, 2010 by glenn-jackman

I'd jump straight to awk to test the value exactly

awk -F, '$3 == 12' file.csv

This, and any regexp-based solution, assumes that the values of the first two fields do not contain commas

0 votes
answered Sep 15, 2017 by harald-rudell

Linux tools cannot practically process csv, because quoted fields can contain newline characters according to rfc 1480 Most dedicated utilities are garbage for various reasons.

Here’s a Node.js v7.10+ single-file executable that “just works” and produces converted json objects, one per line. Should run Linux macOS Windows

Usage for a file with header line:

cat infinite.csv | csv1480json --header
{"some header": "field value"}

Without header line:

echo abc | csv1480json
{1: "abc"}

The grep becomes:

grep '3: "12"'

On the irect text you can do

Paste this as csv1480json accessible via your PATH and give executable permissions:

#!/usr/bin/env node
/******/ (function(modules) { // webpackBootstrap
/******/    // The module cache
/******/    var installedModules = {};
/******/
/******/    // The require function
/******/    function __webpack_require__(moduleId) {
/******/
/******/        // Check if module is in cache
/******/        if(installedModules[moduleId]) {
/******/            return installedModules[moduleId].exports;
/******/        }
/******/        // Create a new module (and put it into the cache)
/******/        var module = installedModules[moduleId] = {
/******/            i: moduleId,
/******/            l: false,
/******/            exports: {}
/******/        };
/******/
/******/        // Execute the module function
/******/        modules[moduleId].call(module.exports, module, module.exports, __webpack_require__);
/******/
/******/        // Flag the module as loaded
/******/        module.l = true;
/******/
/******/        // Return the exports of the module
/******/        return module.exports;
/******/    }
/******/
/******/
/******/    // expose the modules object (__webpack_modules__)
/******/    __webpack_require__.m = modules;
/******/
/******/    // expose the module cache
/******/    __webpack_require__.c = installedModules;
/******/
/******/    // define getter function for harmony exports
/******/    __webpack_require__.d = function(exports, name, getter) {
/******/        if(!__webpack_require__.o(exports, name)) {
/******/            Object.defineProperty(exports, name, {
/******/                configurable: false,
/******/                enumerable: true,
/******/                get: getter
/******/            });
/******/        }
/******/    };
/******/
/******/    // getDefaultExport function for compatibility with non-harmony modules
/******/    __webpack_require__.n = function(module) {
/******/        var getter = module && module.__esModule ?
/******/            function getDefault() { return module['default']; } :
/******/            function getModuleExports() { return module; };
/******/        __webpack_require__.d(getter, 'a', getter);
/******/        return getter;
/******/    };
/******/
/******/    // Object.prototype.hasOwnProperty.call
/******/    __webpack_require__.o = function(object, property) { return Object.prototype.hasOwnProperty.call(object, property); };
/******/
/******/    // __webpack_public_path__
/******/    __webpack_require__.p = "";
/******/
/******/    // Load entry module and return exports
/******/    return __webpack_require__(__webpack_require__.s = 0);
/******/ })
/************************************************************************/
/******/ ([
/* 0 */
/***/ (function(module, exports, __webpack_require__) {

"use strict";


var _extends = Object.assign || function (target) { for (var i = 1; i < arguments.length; i++) { var source = arguments[i]; for (var key in source) { if (Object.prototype.hasOwnProperty.call(source, key)) { target[key] = source[key]; } } } return target; };

var _CsvJsonConverter = __webpack_require__(1);

var _CsvJsonConverter2 = _interopRequireDefault(_CsvJsonConverter);

function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }

launch({ fn: _CsvJsonConverter2.default, getOptions, errorHandler }).catch(errorHandler);

function getOptions() {
  const { argv } = process;
  console.log('getOptions argv', argv);
  const useHeader = argv[2] === '--header';
  if (argv.length !== (useHeader ? 3 : 2)) throw new Error('usage: csv1480json [--header]');
  return { readStream: process.stdin, writeStream: process.stdout, useHeader };
}

async function launch({ fn, getOptions, errorHandler }) {
  process.on('uncaughtException', errorHandler).on('unhandledRejection', errorHandler);
  new fn(_extends({}, getOptions(), { errorHandler }));
}

function errorHandler(e) {
  console.error(e instanceof Error ? e /*TODO .message*/ : `errorHandler value: ${typeof e} ${e}`);
  process.exit(1);
}

/***/ }),
/* 1 */
/***/ (function(module, exports, __webpack_require__) {

"use strict";


Object.defineProperty(exports, "__esModule", {
  value: true
});

var _Pipeline = __webpack_require__(2);

var _Pipeline2 = _interopRequireDefault(_Pipeline);

function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }

// getField result
const FIELD_EOF = 1; // end of file
const FIELD_NONE = 2; // data for complete field not seen yet
const FIELD_RECORD = 3; // got a complete record

const separators = Array.from(',\r\n');

class CsvJsonConverter extends _Pipeline2.default {
  constructor(o) {
    super(o || false);

    this.addData = string => this.csv += string;

    this.isField = s => typeof s === 'string';

    const { useHeader } = o || false;
    this.useHeader = !!useHeader;
    this.csv = '';
    this.recordNo = 1;
    console.log(this.useHeader, Object.keys(o || false));
  }

  getOutput(isEnd) {
    if (isEnd) this.isEnd = true;
    if (this.useHeader && !this.headers) if (!this.getHeader()) return;
    let output = '';
    for (let record; record = this.getRecord(); output += record + '\n');
    return output || undefined;
  }

  getRecord() {
    const fields = this.getFieldList();
    if (fields) {
      // got a record
      const count = fields.length;
      const { fieldCount, recordNo, useHeader, headers } = this;
      if (!fieldCount) this.fieldCount = count;else if (count !== fieldCount) throw new Error(`Record ${recordNo} bad field count: ${count} expected ${fieldCount}`);
      this.recordNo++;
      return `{${fields.map((v, index) => `${useHeader ? headers[index] : index + 1}: ${JSON.stringify(v)}`).join(', ')}}`;
    } else return false;
  }

  getHeader() {
    const list = this.getFieldList();
    if (list) {
      this.headers = list.map(v => JSON.stringify(v));
      this.fieldCount = list.length;
    }
  }

  getFieldList() {
    // array of string or false
    let fields = this.fields || (this.fields = []);
    let field;
    while (this.isField(field = this.getField())) fields.push(field);
    console.log('getFieldList end:', field, fields);
    if (field === FIELD_RECORD) {
      this.fields = null;
      return fields;
    } else return false; // need to wait for more data or end of records
  }

  getField() {
    // string or FIELD_*
    const { isEnd, recordNo } = this;
    const fields = this.fields.length;
    let { csv } = this;
    let csvCh = csv[0];

    if (csvCh === '\r' || csvCh === '\n') {
      // skip the end of line terminating a previous record
      if (csv.length < 2 && !isEnd) return FIELD_NONE; // must have two characters to find \r\n
      const chs = csv.substring(0, 2) === '\r\n' ? 2 : 1;
      this.csv = csv = csv.substring(chs);
      return FIELD_RECORD; // we have a complete record
    }

    if (!csv && isEnd) return fields ? FIELD_RECORD : FIELD_EOF;

    const m = `Record ${recordNo} field ${fields + 1}`;

    if (fields) if (csvCh === ',') csvCh = (this.csv = csv = csv.substring(1))[0];else throw new Error(`${m} missing field-separating comma`); // TODO i
Welcome to Q&A, where you can ask questions and receive answers from other members of the community.
Website Online Counter

...