Can't write a single row from a RowSet to file
Posted: Mon Mar 24, 2014 9:45 am
I'm trying to download a bunch of files, remove some lines from each file, and save them all in one file.
The code below works. It runs by echoing each line through bash and echo, but is extremely slow, order of one second per line. I tried using the writeFixedWidth task which is disabled in the code below, but it didn't work as expected. It consumed the entire rowset on the first iteration, so that my line-number checking never ran, and the lines I wanted to exclude were included in the file.
Is this a bug or a feature? How can I do this within Director?
I'm using Director version 4.1.1.
The code below works. It runs by echoing each line through bash and echo, but is extremely slow, order of one second per line. I tried using the writeFixedWidth task which is disabled in the code below, but it didn't work as expected. It consumed the entire rowset on the first iteration, so that my line-number checking never ran, and the lines I wanted to exclude were included in the file.
Is this a bug or a feature? How can I do this within Director?
I'm using Director version 4.1.1.
Code: Select all
<project name="Geosat solar F10.7 flux" mainModule="Main" version="2.0">
<description>Download Geosat F10.7 solar flux data</description>
<module name="Main">
<ftp label="FTP to NGDC" resourceId="ftp.ngdc.noaa.gov" version="1.0" disabled="false">
<get label="Get files" destinationDir=" /tmp" whenFileExists="overwrite" destinationFilesVariable="downloaded_files">
<fileset dir="/STP/space-weather/solar-data/solar-features/solar-radio/noontime-flux/penticton/penticton_observed/tables/">
<wildcardFilter>
<include pattern="drao_noontime-flux-observed_199?.txt" caseSensitive="false" />
<include pattern="drao_noontime-flux-observed_20*.txt" caseSensitive="false" />
</wildcardFilter>
</fileset>
</get>
</ftp>
<print label="print ${downloaded_files}" version="1.0" disabled="true">
<![CDATA[Downloaded:
${downloaded_files}]]>
</print>
<rename label="Move old merged file out of the way" inputFile=" /tmp/merged.txt" newName="merged 2.txt" whenFileExists="rename" version="1.0" executeOnlyIf="${FileInfo(" /tmp/merged.txt"):exists}" />
<forLoop label="Loop over years" beginIndex="1992" endIndex="2001" step="1" currentIndexVariable="year" disabled="false">
<readFlatFile label="Read a F10.7 file" outputRowSetVariable="input_file" recordDelimiter="LF" processedInputFilesVariable="filename" version="1.0">
<fileset dir=" /tmp">
<wildcardFilter>
<include pattern="*${year}.txt" />
</wildcardFilter>
</fileset>
</readFlatFile>
<print label="Print processing file" version="1.0">
<![CDATA[Processing ${filename}]]>
</print>
<forEachLoop label="Loop over file lines" itemsVariable="${input_file}" currentItemVariable="line" currentIterationVariable="lineno">
<print label="print lineno" version="1.0">
<![CDATA[lineno ${lineno}]]>
</print>
<setVariable label="line_deleted = False" name="line_deleted" value="False" version="2.0" />
<if label="If line number is one to be deleted" condition="${lineno == 2 or lineno == 3 or lineno == 4 or lineno == 5 or lineno > 41}">
<print label="print deteted line number" version="1.0">
<![CDATA[deleted line ${lineno}]]>
</print>
<setVariable label="line_deleted = True" name="line_deleted" value="True" version="2.0" />
</if>
<if label="else" condition="${line_deleted == False}">
<setVariable label="set linetext" name="linetext" value="${line[1]}" version="2.0" disabled="true" />
<print label="print line" version="1.0" disabled="false">
<![CDATA[using ${lineno}, ${line[1]}
line: ${line}]]>
</print>
<writeFixedWidth label="Write to merged file" inputRowSetVariable="${line}" outputFile=" /tmp/merged.txt" whenFileExists="append" includeHeadings="false" recordDelimiter="LF" version="1.0" disabled="true" />
<exec label="bash: echo to merged file" executable="/bin/bash" version="1.0">
<arg value="-c" />
<arg value="/bin/echo 'lineno ${lineno}, ${line[1]}' >> /tmp/merged.txt" />
</exec>
</if>
</forEachLoop>
</forLoop>
</module>
</project>