#Read input file def read_file(filename): file = str(filename) record_reader = csv.reader(open(file, "rU"), delimiter = ',') catList = [] testList = [] isFirstLine = True for row in record_reader: if isFirstLine: length = int(len(row)) ID = row ScientNameID = ID.index('species') LongID = ID.index('lon') LatID = ID.index('lat') CatNoID = ID.index('catalogNumber') InstCodeID = ID.index('institution') CountryID = ID.index('country') DateID = ID.index('earliestDateCollected') isFirstLine = False else: CatNo = row[CatNoID] ScientName = row[ScientNameID] Long = row[LongID] Lat = row[LatID] InstCode = row[InstCodeID] Country = row[CountryID] Date = row[DateID] if CatNo not in testList: testList.append(CatNo) CatNo = Specimen_Record(CatNo, ScientName, InstCode, Lat, Long, Country, Date) catList.append(CatNo) return catList #Create and write new file, output def write_file(records): if len(sys.argv) == 3: outputfile = sys.argv[2] recWriter = open(outputfile, 'w') # csv.writer(open(outputfile, 'w'), dialect='excel', delimiter = "\t") else: output_stream = sys.stdout for instance in records: if len(instance.ScientName) > 2: if len(instance.Long) > 1: if len(instance.Date) > 2: recWriter.write("%s\n" % '\t'.join([instance.InstCode, instance.ScientName, instance.Long, instance.Lat, instance.Date])) #Here's where classes and functions are defined class Specimen_Record: def __init__ (self, CatNo, ScientName, InstCode, Lat, Long, Country, Date): #Check to make sure species is the same# self.CatNo = CatNo self.ScientName = ScientName self.InstCode = InstCode self.Lat = Lat self.Long = Long self.Country = Country self.Date = Date def __str__ (self): summary_lines = [self.ScientName + self.CatNo + self.Long + self.Lat] return '\n'.join(summary_lines) #Here's where the action is import sys, csv program_name = sys.argv[0] if len(sys.argv) > 3 or len(sys.argv) < 2: sys.exit(program_name + ": Expecting two arguments: [filename], [outputfile]") #filename should be csv, outputfile should be txt if len(sys.argv) == 3: filename = sys.argv[1] else: filename = sys.stdin records = read_file(filename) print len(records) write_file(records) #Now you have a txt file that can be imported and saved in excel