Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
ctools/csort.py
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
executable file
177 lines (149 sloc)
5.18 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
from __future__ import print_function | |
#----------------------------------------------------------------------- | |
# Imports | |
#----------------------------------------------------------------------- | |
import sys | |
import getopt | |
#----------------------------------------------------------------------- | |
# Defaults | |
#----------------------------------------------------------------------- | |
DEF_SEPARATOR = '/' | |
VERBOSE = False | |
#----------------------------------------------------------------------- | |
# Functions | |
#----------------------------------------------------------------------- | |
def Usage(msg=None): | |
print(""" | |
csort -rc -k KEYSTR FILE1 [ FILE2 .. ] | |
Sort clause file(s) and print to standard out. | |
-k KEYSTR - Sort clauses relative to one another | |
based on the value of KEYSTR | |
-c - Sort fields within a clause | |
-r - Sort in reverse order. | |
Either -c or -k key must be specified. Both is also OK | |
""") | |
if msg: print (msg) | |
raise SystemExit | |
def print_verbose(verbose,msg): | |
if verbose: | |
print("VERBOSE: ", msg) | |
def print_error(msg): | |
print("ERROR: ", msg) | |
sys.exit() | |
# Read clauses | |
# - Clauses in input file must be separated by one or more blank lines | |
# - Yields one clause at a time. | |
# - Each clause returned as a list of pairs, | |
# consisting of the line number (starts at 1) and the line text | |
def read_clauses(files,lower_case=False): | |
line_no = 0 | |
for file in files: | |
clause = [] | |
# Open file | |
if file=="-": | |
fin = sys.stdin | |
else: | |
fin = open(file,"r") | |
for line in fin.readlines(): | |
line_no += 1 | |
line = line.strip() | |
# End of clause | |
if not line: | |
if clause: | |
yield clause | |
clause = [] | |
# Add line to clause, only if it has : | |
if ':' in line: | |
if lower_case: line = line.lower() | |
k,value = line.split(":",1) | |
clause.append( (k, line_no, value) ) | |
# Close file | |
if not file=="-": | |
fin.close() | |
# return last clause in file | |
if clause: | |
yield clause | |
clause = [] | |
# Converts clause as returned from read_clauses() | |
# to a dictionary | |
# - Key,value are taken from line like KEY: VALUE | |
# - Specific keys can appear multiple times | |
# - Values are returned as an array of strings | |
def clause2dict(clause): | |
cdict = {} | |
for recno, (lineno, linestr) in enumerate(clause): | |
# If no ':', ignore lines | |
if not ':' in linestr: continue | |
key, value = linestr.split(":",1) | |
key = key.strip() | |
if key in cdict: | |
cdict[key].append( value.strip() ) | |
else: | |
# Store record number of first occurance of this key | |
cdict[key] = [ recno, value.strip() ] | |
# Convert value lists to value tuples | |
return { k:tuple(v) for k,v in cdict.items() } | |
def sort_clauses(keystr,clauses,reverse=False,numeric=False): | |
global VERBOSE | |
value2indices = [] | |
for clauseno, clause in enumerate(clauses): | |
# Find value(s) for key in this clause | |
nvalues = 0 | |
for k, lineno, value in clause: | |
if k==keystr: | |
single_value = value | |
single_lineno = lineno | |
nvalues += 1 | |
if nvalues==0: | |
print_error("Clause %d on line %d has no intance of key %s" % | |
(clauseno+1, clause[0][1]+1, keystr) ) | |
if nvalues>=2: | |
print_error("Clause %d on line %d has multiple values for key %s" % | |
(clauseno+1, clause[0][1]+1, keystr) ) | |
if numeric: | |
try: | |
single_value = float( single_value ) | |
except (TypeError,ValueError): | |
print_verbose(VERBOSE, "Invalid numeric value on line %d, clause %d. Key,value are (%s,%s). Will user value of 0." % | |
(single_lineno, clauseno+1, keystr, single_value)) | |
single_value = float(0) | |
value2indices.append ( (single_value, clauseno) ) | |
value2indices.sort(reverse=reverse) | |
return [ clauses[clauseno] for single_value, clauseno in value2indices ] | |
# Print claues in pretty format | |
# - Join strings for multi-valued records | |
# - Optionally sort each clause by keys alphabetically | |
def print_clauses(clauses, sort_within_clause=False,reverse=False): | |
for clauseno, clause in enumerate(clauses): | |
if clauseno>0: print() | |
# Sort clause by key, and secondarily by original lineno | |
# - But don't print original line number though | |
if sort_within_clause: | |
for k, lineno, value in sorted(clause): | |
print( "%s: %s" % (k, value) ) | |
else: | |
for k, lineno, value in clause: | |
print( "%s: %s" % (k, value) ) | |
#----------------------------------------------------------------------- | |
# Main | |
#----------------------------------------------------------------------- | |
def main(): | |
global VERBOSE | |
# Read options | |
opts, fnames = getopt.getopt(sys.argv[1:], 'rcnk:v') | |
if len(fnames)<1: Usage() | |
opts = dict(opts) | |
reverse = '-r' in opts | |
numeric = '-n' in opts | |
VERBOSE = '-v' in opts | |
if not '-k' in opts and not '-c' in opts: Usage() | |
# Read clauses | |
clauses = [ clause for clause in read_clauses(fnames) ] | |
# Order clauses according to the value | |
# associated with opts['-k']) | |
if '-k' in opts: | |
clauses = sort_clauses(opts['-k'], clauses, reverse=reverse, numeric=numeric) | |
# Print clauses | |
print_clauses(clauses,'-c' in opts,reverse) | |
main() |