001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.net.ftp.parser;
019
020import java.text.ParseException;
021import java.util.List;
022
023import org.apache.commons.net.ftp.FTPClientConfig;
024import org.apache.commons.net.ftp.FTPFile;
025
026/**
027 * Implementation of FTPFileEntryParser and FTPFileListParser for IBM zOS/MVS
028 * Systems.
029 * 
030 * @author <a href="henrik.sorensen@balcab.ch">Henrik Sorensen</a>
031 * @author <a href="jnadler@srcginc.com">Jeff Nadler</a>
032 * @author <a href="wnoto@openfinance.com">William Noto</a>
033 * 
034 * @version $Id: MVSFTPEntryParser.java 658520 2008-05-21 01:14:11Z sebb $
035 * @see org.apache.commons.net.ftp.FTPFileEntryParser FTPFileEntryParser (for
036 *      usage instructions)
037 */
038public class MVSFTPEntryParser extends ConfigurableFTPFileEntryParserImpl {
039
040    static final int UNKNOWN_LIST_TYPE = -1;
041    static final int FILE_LIST_TYPE = 0;
042    static final int MEMBER_LIST_TYPE = 1;
043    static final int UNIX_LIST_TYPE = 2;
044    static final int JES_LEVEL_1_LIST_TYPE = 3;
045    static final int JES_LEVEL_2_LIST_TYPE = 4;
046
047    private int isType = UNKNOWN_LIST_TYPE;
048
049    /**
050     * Fallback parser for Unix-style listings
051     */
052    private UnixFTPEntryParser unixFTPEntryParser;
053
054    /**
055     * Dates are ignored for file lists, but are used for member lists where
056     * possible
057     */
058    static final String DEFAULT_DATE_FORMAT = "yyyy/MM/dd HH:mm"; // 2001/09/18
059                                                                    // 13:52
060
061    /**
062     * Matches these entries: Volume Unit Referred Ext Used Recfm Lrecl BlkSz
063     * Dsorg Dsname B10142 3390 2006/03/20 2 31 F 80 80 PS MDI.OKL.WORK
064     * 
065     */
066    static final String FILE_LIST_REGEX = "\\S+\\s+" + // volume
067                                                                // ignored
068            "\\S+\\s+" + // unit - ignored
069            "\\S+\\s+" + // access date - ignored
070            "\\S+\\s+" + // extents -ignored
071            "\\S+\\s+" + // used - ignored
072            "[FV]\\S*\\s+" + // recfm - must start with F or V
073            "\\S+\\s+" + // logical record length -ignored
074            "\\S+\\s+" + // block size - ignored
075            "(PS|PO|PO-E)\\s+" + // Dataset organisation. Many exist
076            // but only support: PS, PO, PO-E
077            "(\\S+)\\s*"; // Dataset Name (file name)
078
079    /**
080     * Matches these entries: Name VV.MM Created Changed Size Init Mod Id
081     * TBSHELF 01.03 2002/09/12 2002/10/11 09:37 11 11 0 KIL001
082     */
083    static final String MEMBER_LIST_REGEX = "(\\S+)\\s+" + // name
084            "\\S+\\s+" + // version, modification (ignored)
085            "\\S+\\s+" + // create date (ignored)
086            "(\\S+)\\s+" + // modification date
087            "(\\S+)\\s+" + // modification time
088            "\\S+\\s+" + // size in lines (ignored)
089            "\\S+\\s+" + // size in lines at creation(ignored)
090            "\\S+\\s+" + // lines modified (ignored)
091            "\\S+\\s*"; // id of user who modified (ignored)
092
093    /**
094     * Matches these entries, note: no header: IBMUSER1 JOB01906 OUTPUT 3 Spool
095     * Files 012345678901234567890123456789012345678901234 1 2 3 4
096     */
097    static final String JES_LEVEL_1_LIST_REGEX = "(\\S+)\\s+" + // job
098                                                                        // name
099                                                                        // ignored
100            "(\\S+)\\s+" + // job number
101            "(\\S+)\\s+" + // job status (OUTPUT,INPUT,ACTIVE)
102            "(\\S+)\\s+" + // number of spool files
103            "(\\S+)\\s+" + // Text "Spool" ignored
104            "(\\S+)\\s*" // Text "Files" ignored
105    ;
106
107    /**
108     * JES INTERFACE LEVEL 2 parser Matches these entries: JOBNAME JOBID OWNER
109     * STATUS CLASS IBMUSER1 JOB01906 IBMUSER OUTPUT A RC=0000 3 spool files
110     * IBMUSER TSU01830 IBMUSER OUTPUT TSU ABEND=522 3 spool files
111     * 012345678901234567890123456789012345678901234 1 2 3 4
112     * 012345678901234567890123456789012345678901234567890
113     */
114
115    static final String JES_LEVEL_2_LIST_REGEX = "(\\S+)\\s+" + // job
116                                                                        // name
117                                                                        // ignored
118            "(\\S+)\\s+" + // job number
119            "(\\S+)\\s+" + // owner ignored
120            "(\\S+)\\s+" + // job status (OUTPUT,INPUT,ACTIVE) ignored
121            "(\\S+)\\s+" + // job class ignored
122            "(\\S+).*" // rest ignored
123    ;
124
125    /*
126     * ---------------------------------------------------------------------
127     * Very brief and incomplete description of the zOS/MVS-filesystem. (Note:
128     * "zOS" is the operating system on the mainframe, and is the new name for
129     * MVS)
130     * 
131     * The filesystem on the mainframe does not have hierarchal structure as for
132     * example the unix filesystem. For a more comprehensive description, please
133     * refer to the IBM manuals
134     * 
135     * @LINK:
136     * http://publibfp.boulder.ibm.com/cgi-bin/bookmgr/BOOKS/dgt2d440/CONTENTS
137     * 
138     * 
139     * Dataset names =============
140     * 
141     * A dataset name consist of a number of qualifiers separated by '.', each
142     * qualifier can be at most 8 characters, and the total length of a dataset
143     * can be max 44 characters including the dots.
144     * 
145     * 
146     * Dataset organisation ====================
147     * 
148     * A dataset represents a piece of storage allocated on one or more disks.
149     * The structure of the storage is described with the field dataset
150     * organinsation (DSORG). There are a number of dataset organisations, but
151     * only two are usable for FTP transfer.
152     * 
153     * DSORG: PS: sequential, or flat file PO: partitioned dataset PO-E:
154     * extended partitioned dataset
155     * 
156     * The PS file is just a flat file, as you would find it on the unix file
157     * system.
158     * 
159     * The PO and PO-E files, can be compared to a single level directory
160     * structure. A PO file consist of a number of dataset members, or files if
161     * you will. It is possible to CD into the file, and to retrieve the
162     * individual members.
163     * 
164     * 
165     * Dataset record format =====================
166     * 
167     * The physical layout of the dataset is described on the dataset itself.
168     * There are a number of record formats (RECFM), but just a few is relavant
169     * for the FTP transfer.
170     * 
171     * Any one beginning with either F or V can safely used by FTP transfer. All
172     * others should only be used with great care, so this version will just
173     * ignore the other record formats. F means a fixed number of records per
174     * allocated storage, and V means a variable number of records.
175     * 
176     * 
177     * Other notes ===========
178     * 
179     * The file system supports automatically backup and retrieval of datasets.
180     * If a file is backed up, the ftp LIST command will return: ARCIVE Not
181     * Direct Access Device KJ.IOP998.ERROR.PL.UNITTEST
182     * 
183     * 
184     * Implementation notes ====================
185     * 
186     * Only datasets that have dsorg PS, PO or PO-E and have recfm beginning
187     * with F or V, is fully parsed.
188     * 
189     * The following fields in FTPFile is used: FTPFile.Rawlisting: Always set.
190     * FTPFile.Type: DIRECTORY_TYPE or FILE_TYPE or UNKNOWN FTPFile.Name: name
191     * FTPFile.Timestamp: change time or null
192     * 
193     * 
194     * 
195     * Additional information ======================
196     * 
197     * The MVS ftp server supports a number of features via the FTP interface.
198     * The features are controlled with the FTP command quote site filetype=<SEQ|JES|DB2>
199     * SEQ is the default and used for normal file transfer JES is used to
200     * interact with the Job Entry Subsystem (JES) similar to a job scheduler
201     * DB2 is used to interact with a DB2 subsystem
202     * 
203     * This parser supports SEQ and JES.
204     * 
205     * 
206     * 
207     * 
208     * 
209     * 
210     */
211
212    /**
213     * The sole constructor for a MVSFTPEntryParser object.
214     * 
215     */
216    public MVSFTPEntryParser() {
217        super(""); // note the regex is set in preParse.
218        super.configure(null); // configure parser with default configurations
219    }
220
221    /**
222     * Parses a line of an z/OS - MVS FTP server file listing and converts it
223     * into a usable format in the form of an <code> FTPFile </code> instance.
224     * If the file listing line doesn't describe a file, then
225     * <code> null </code> is returned. Otherwise a <code> FTPFile </code>
226     * instance representing the file is returned.
227     * 
228     * @param entry
229     *            A line of text from the file listing
230     * @return An FTPFile instance corresponding to the supplied entry
231     */
232    public FTPFile parseFTPEntry(String entry) {
233        boolean isParsed = false;
234        FTPFile f = new FTPFile();
235
236        if (isType == FILE_LIST_TYPE)
237            isParsed = parseFileList(f, entry);
238        else if (isType == MEMBER_LIST_TYPE) {
239            isParsed = parseMemberList(f, entry);
240            if (!isParsed)
241                isParsed = parseSimpleEntry(f, entry);
242        } else if (isType == UNIX_LIST_TYPE) {
243            isParsed = parseUnixList(f, entry);
244        } else if (isType == JES_LEVEL_1_LIST_TYPE) {
245            isParsed = parseJeslevel1List(f, entry);
246        } else if (isType == JES_LEVEL_2_LIST_TYPE) {
247            isParsed = parseJeslevel2List(f, entry);
248        }
249
250        if (!isParsed)
251            f = null;
252        
253        return f;
254    }
255
256    /**
257     * Parse entries representing a dataset list. Only datasets with DSORG PS or
258     * PO or PO-E and with RECFM F* or V* will be parsed.
259     * 
260     * Format of ZOS/MVS file list: 1 2 3 4 5 6 7 8 9 10 Volume Unit Referred
261     * Ext Used Recfm Lrecl BlkSz Dsorg Dsname B10142 3390 2006/03/20 2 31 F 80
262     * 80 PS MDI.OKL.WORK ARCIVE Not Direct Access Device
263     * KJ.IOP998.ERROR.PL.UNITTEST B1N231 3390 2006/03/20 1 15 VB 256 27998 PO
264     * PLU B1N231 3390 2006/03/20 1 15 VB 256 27998 PO-E PLB
265     * 
266     * ----------------------------------- Group within Regex [1] Volume [2]
267     * Unit [3] Referred [4] Ext: number of extents [5] Used [6] Recfm: Record
268     * format [7] Lrecl: Logical record length [8] BlkSz: Block size [9] Dsorg:
269     * Dataset organisation. Many exists but only support: PS, PO, PO-E [10]
270     * Dsname: Dataset name
271     * 
272     * Note: When volume is ARCIVE, it means the dataset is stored somewhere in
273     * a tape archive. These entries is currently not supported by this parser.
274     * A null value is returned.
275     * 
276     * @param file
277     *            will be updated with Name, Type, Timestamp if parsed.
278     * @param entry zosDirectoryEntry
279     * @return true: entry was parsed, false: entry was not parsed.
280     */
281    private boolean parseFileList(FTPFile file, String entry) {
282        if (matches(entry)) {
283            file.setRawListing(entry);
284            String name = group(2);
285            String dsorg = group(1);
286            file.setName(name);
287
288            // DSORG
289            if ("PS".equals(dsorg)) {
290                file.setType(FTPFile.FILE_TYPE);
291            } 
292            else if ("PO".equals(dsorg) || "PO-E".equals(dsorg)) {
293                // regex already ruled out anything other than PO or PO-E
294                file.setType(FTPFile.DIRECTORY_TYPE);
295            } 
296            else {
297                return false;
298            }
299            
300            return true;
301        }
302        
303        return false;
304    }
305
306    /**
307     * Parse entries within a partitioned dataset.
308     * 
309     * Format of a memberlist within a PDS: 1 2 3 4 5 6 7 8 9 Name VV.MM Created
310     * Changed Size Init Mod Id TBSHELF 01.03 2002/09/12 2002/10/11 09:37 11 11
311     * 0 KIL001 TBTOOL 01.12 2002/09/12 2004/11/26 19:54 51 28 0 KIL001
312     * 
313     * ------------------------------------------- [1] Name [2] VV.MM: Version .
314     * modification [3] Created: yyyy / MM / dd [4,5] Changed: yyyy / MM / dd
315     * HH:mm [6] Size: number of lines [7] Init: number of lines when first
316     * created [8] Mod: number of modified lines a last save [9] Id: User id for
317     * last update
318     * 
319     * 
320     * @param file
321     *            will be updated with Name, Type and Timestamp if parsed.
322     * @param entry zosDirectoryEntry
323     * @return true: entry was parsed, false: entry was not parsed.
324     */
325    private boolean parseMemberList(FTPFile file, String entry) {
326        if (matches(entry)) {
327            file.setRawListing(entry);
328            String name = group(1);
329            String datestr = group(2) + " " + group(3);
330            file.setName(name);
331            file.setType(FTPFile.FILE_TYPE);
332            try {
333                file.setTimestamp(super.parseTimestamp(datestr));
334            } catch (ParseException e) {
335                e.printStackTrace();
336                // just ignore parsing errors.
337                // TODO check this is ok
338                return false; // this is a parsing failure too.
339            }
340            return true;
341        }
342
343        return false;
344    }
345
346    /**
347     * Assigns the name to the first word of the entry. Only to be used from a
348     * safe context, for example from a memberlist, where the regex for some
349     * reason fails. Then just assign the name field of FTPFile.
350     * 
351     * @param file
352     * @param entry
353     * @return
354     */
355    private boolean parseSimpleEntry(FTPFile file, String entry) {
356        if (entry != null && entry.length() > 0) {
357            file.setRawListing(entry);
358            String name = entry.split(" ")[0];
359            file.setName(name);
360            file.setType(FTPFile.FILE_TYPE);
361            return true;
362        }
363        return false;
364    }
365
366    /**
367     * Parse the entry as a standard unix file. Using the UnixFTPEntryParser.
368     * 
369     * @param file
370     * @param entry
371     * @return true: entry is parsed, false: entry could not be parsed.
372     */
373    private boolean parseUnixList(FTPFile file, String entry) {
374        file = unixFTPEntryParser.parseFTPEntry(entry);
375        if (file == null)
376            return false;
377        return true;
378    }
379
380    /**
381     * Matches these entries, note: no header: [1] [2] [3] [4] [5] IBMUSER1
382     * JOB01906 OUTPUT 3 Spool Files
383     * 012345678901234567890123456789012345678901234 1 2 3 4
384     * ------------------------------------------- Group in regex [1] Job name
385     * [2] Job number [3] Job status (INPUT,ACTIVE,OUTPUT) [4] Number of sysout
386     * files [5] The string "Spool Files"
387     * 
388     * 
389     * @param file
390     *            will be updated with Name, Type and Timestamp if parsed.
391     * @param entry zosDirectoryEntry
392     * @return true: entry was parsed, false: entry was not parsed.
393     */
394    private boolean parseJeslevel1List(FTPFile file, String entry) {
395        if (matches(entry)) {
396            if (group(3).equalsIgnoreCase("OUTPUT")) {
397                file.setRawListing(entry);
398                String name = group(2); /* Job Number, used by GET */
399                file.setName(name);
400                file.setType(FTPFile.FILE_TYPE);
401                return true;
402            }
403        }
404
405        return false;
406    }
407
408    /**
409     * Matches these entries, note: no header: [1] [2] [3] [4] [5] JOBNAME JOBID
410     * OWNER STATUS CLASS IBMUSER1 JOB01906 IBMUSER OUTPUT A RC=0000 3 spool
411     * files IBMUSER TSU01830 IBMUSER OUTPUT TSU ABEND=522 3 spool files
412     * 012345678901234567890123456789012345678901234 1 2 3 4
413     * ------------------------------------------- Group in regex [1] Job name
414     * [2] Job number [3] Owner [4] Job status (INPUT,ACTIVE,OUTPUT) [5] Job
415     * Class [6] The rest
416     * 
417     * 
418     * @param file
419     *            will be updated with Name, Type and Timestamp if parsed.
420     * @param entry zosDirectoryEntry
421     * @return true: entry was parsed, false: entry was not parsed.
422     */
423    private boolean parseJeslevel2List(FTPFile file, String entry) {
424        if (matches(entry)) {
425            if (group(4).equalsIgnoreCase("OUTPUT")) {
426                file.setRawListing(entry);
427                String name = group(2); /* Job Number, used by GET */
428                file.setName(name);
429                file.setType(FTPFile.FILE_TYPE);
430                return true;
431            }
432        }
433
434        return false;
435    }
436
437    /**
438     * preParse is called as part of the interface. Per definition is is called
439     * before the parsing takes place. Three kind of lists is recognize:
440     * z/OS-MVS File lists z/OS-MVS Member lists unix file lists
441     * @since 2.0
442     */
443    @Override
444    public List<String> preParse(List<String> orig) {
445        // simply remove the header line. Composite logic will take care of the
446        // two different types of
447        // list in short order.
448        if (orig != null && orig.size() > 0) {
449            String header = orig.get(0);
450            if (header.indexOf("Volume") >= 0 && header.indexOf("Dsname") >= 0) {
451                setType(FILE_LIST_TYPE);
452                super.setRegex(FILE_LIST_REGEX);
453            } else if (header.indexOf("Name") >= 0 && header.indexOf("Id") >= 0) {
454                setType(MEMBER_LIST_TYPE);
455                super.setRegex(MEMBER_LIST_REGEX);
456            } else if (header.indexOf("total") == 0) {
457                setType(UNIX_LIST_TYPE);
458                unixFTPEntryParser = new UnixFTPEntryParser();
459            } else if (header.indexOf("Spool Files") >= 30) {
460                setType(JES_LEVEL_1_LIST_TYPE);
461                super.setRegex(JES_LEVEL_1_LIST_REGEX);
462            } else if (header.indexOf("JOBNAME") == 0
463                    && header.indexOf("JOBID") > 8) {// header contains JOBNAME JOBID OWNER // STATUS CLASS
464                setType(JES_LEVEL_2_LIST_TYPE);
465                super.setRegex(JES_LEVEL_2_LIST_REGEX);
466            } else {
467                setType(UNKNOWN_LIST_TYPE);
468            }
469            
470            if (isType != JES_LEVEL_1_LIST_TYPE) { // remove header is necessary
471                orig.remove(0);
472            }
473        }
474        
475        return orig;
476    }
477    
478    /**
479     * Explicitly set the type of listing being processed.
480     * @param type The listing type.
481     */
482    void setType(int type) {
483        isType = type;
484    }
485
486    /* 
487     * @return
488     */
489    @Override
490    protected FTPClientConfig getDefaultConfiguration() {
491        return new FTPClientConfig(FTPClientConfig.SYST_MVS,
492                DEFAULT_DATE_FORMAT, null, null, null, null);
493    }
494
495}