diff --git a/src/main/java/com/hy/java/uct/cdtocode/CDToCodeTracer.java b/src/main/java/com/hy/java/uct/cdtocode/CDToCodeTracer.java index 318502fe4ced9a0ea1e93cc4d35a1daf77082faa..a2f336ba1c3092a89bfbad1c17aaf0906a3eb019 100644 --- a/src/main/java/com/hy/java/uct/cdtocode/CDToCodeTracer.java +++ b/src/main/java/com/hy/java/uct/cdtocode/CDToCodeTracer.java @@ -43,8 +43,8 @@ public class CDToCodeTracer { */ List doc_dir_ls = new ArrayList<>(); // 在这儿添加多个文件 - doc_dir_ls.add(doc_dir + "basic-architecture.adoc"); - doc_dir_ls.add(doc_dir + "Jetty10 Operations Guide _ The Eclipse Foundation.txt"); + doc_dir_ls.add(doc_dir + "jetty\\basic-architecture.adoc"); + doc_dir_ls.add(doc_dir + "jetty\\Jetty10 Operations Guide _ The Eclipse Foundation.txt"); // 实际使用的Map,保存每份文档地址及其内容 Map> dir_sentences_map = DocReader.readDocs(doc_dir_ls); /* diff --git a/src/main/java/com/hy/java/uct/cdtocode/mapper/CodeRelationMapper.java b/src/main/java/com/hy/java/uct/cdtocode/mapper/CodeRelationMapper.java index cd56b4b518c51911bb5cc339e6f04d4ad6043f08..18f5857b70c10c4d0674b770db266837e68b17b6 100644 --- a/src/main/java/com/hy/java/uct/cdtocode/mapper/CodeRelationMapper.java +++ b/src/main/java/com/hy/java/uct/cdtocode/mapper/CodeRelationMapper.java @@ -609,8 +609,51 @@ public class CodeRelationMapper { * 对比mapped_javaFile_inTemp和mapped_javaFile_inCls,合并两者的“基于关系的追踪” */ private static MappedFile mergeTwoMappedFiles(MappedFile mapped_javaFile_inTemp, MappedFile mapped_javaFile_inCls) { - // TODO Auto-generated method stub - return null; + MappedFile res = new MappedFile(); + res.java_file_dir = mapped_javaFile_inTemp.java_file_dir; + for (Pair P_inTemp : mapped_javaFile_inTemp.P_ls) { + Pair P = P_inTemp; + // 接下来的比较类似排序:比较P与mapped_javaFile_inCls的P_inCls,如果两者具有相同的关系,则取概率大的 + for (Pair P_inCls : mapped_javaFile_inCls.P_ls) { + if (P.getLeft() != null && P_inCls.getLeft() != null) { + if (P.getLeft().related_ent.name.equals(P_inCls.getLeft().related_ent.name) && P.getLeft().relation_type.equals(P_inCls.getLeft().relation_type)) { + P_inCls.getLeft().should_be_del = true; + // 取概率大的 + if (P.getRight() < P_inCls.getRight()) { + P = P_inCls; + } + } + } else if (P.getLeft() == null && P_inCls.getLeft() == null) { + // 如果P_inCls没有EntityRelation,则做一个假的EntityRelation用于标记该P_inCls在后面需被忽略掉 + EntityRelation null_er = new EntityRelation(); + null_er.should_be_del = true; + P_inCls.setLeft(null_er); + // 取概率大的 + if (P.getRight() < P_inCls.getRight()) { + P = P_inCls; + } + } + } + res.P_ls.add(P); + } + // 然后再把mapped_javaFile_inCls里没检测到的P_inCls都添加上 + for (Pair P_inCls : mapped_javaFile_inCls.P_ls) { + if (P_inCls.getLeft() == null) { + // 这时如果还是null,说明P_inTemp的P全都不是null。所以P_inCls的null的P都未被检测过,因此直接添加P_inCls的这个P + res.P_ls.add(P_inCls); + } else if (!P_inCls.getLeft().should_be_del) { + res.P_ls.add(P_inCls); + } + } + // 最后过滤一下res,把假的EntityRelation都删掉 + for (Pair P : res.P_ls) { + if (P.getLeft() != null) { + if (P.getLeft().related_ent == null) { + P.setLeft(null); + } + } + } + return res; } /** @@ -619,15 +662,20 @@ public class CodeRelationMapper { public static void check(Map mapped_classes) { Set ClsImg_shortName_set = mapped_classes.keySet(); for (String ClsImg_shortName : ClsImg_shortName_set) { + System.out.println("======================================================"); UMLClass UML_class = mapped_classes.get(ClsImg_shortName); - for (MappedFile mapped_javaFile : UML_class.mapped_javaFile_ls) { - for (Pair P : mapped_javaFile.P_ls) { - if (P.getLeft() != null) { - System.out.println(UML_class.getTitle() + "有" + P.getRight() + "的概率追踪到代码中的" + mapped_javaFile.java_file_dir + ",参考其与" + P.getLeft().related_ent.name + "的" + P.getLeft().relation_type + "关系。"); - } else { - System.out.println(UML_class.getTitle() + "有" + P.getRight() + "的概率追踪到代码中的" + mapped_javaFile.java_file_dir + ",这条追踪是没有相关关系的。"); + if (UML_class.mapped_javaFile_ls.size() > 0) { + for (MappedFile mapped_javaFile : UML_class.mapped_javaFile_ls) { + for (Pair P : mapped_javaFile.P_ls) { + if (P.getLeft() != null) { + System.out.println(UML_class.getTitle() + "有" + P.getRight() + "的概率追踪到代码中的" + mapped_javaFile.java_file_dir + ",参考其与文档实体" + P.getLeft().related_ent.name + "的" + P.getLeft().relation_type + "关系。"); + } else { + System.out.println(UML_class.getTitle() + "有" + P.getRight() + "的概率追踪到代码中的" + mapped_javaFile.java_file_dir + ",这条追踪是没有相关关系的。"); + } } } + } else { + System.out.println(UML_class.getTitle() + "没有追踪到对应的代码实现。"); } } } diff --git a/src/main/java/com/hy/java/uct/util/EntityRelation.java b/src/main/java/com/hy/java/uct/util/EntityRelation.java index c5c82838b4c19acfb57021158b0c8c713a4ab463..70f11f0e4a4a56e132ee527976e568ede62a3c89 100644 --- a/src/main/java/com/hy/java/uct/util/EntityRelation.java +++ b/src/main/java/com/hy/java/uct/util/EntityRelation.java @@ -9,9 +9,16 @@ public class EntityRelation { public Entity related_ent = null; public String relation_type = ""; public double R_Entdoc_OtherEntdoc = 0.0; + /** + * 该阈值可改为任意大于0但非常接近0的正数 + */ public double and_or = 0.001; /** * PR_doc(Ent_doc,Ent'_doc) */ public double PR_doc = 0.0; + /** + * 在最后合并mappedFile时,用于检测mappedFile_forCls的关系是否要舍弃 + */ + public boolean should_be_del = false; } diff --git a/src/main/resources/cdtocode/cd/cd-Apache OODT File Manager.png b/src/main/resources/cdtocode/cd/cd-Apache OODT File Manager.png new file mode 100644 index 0000000000000000000000000000000000000000..41eca3c67c65d8cbc73f9cf4dcdd9ad5401ced2b Binary files /dev/null and b/src/main/resources/cdtocode/cd/cd-Apache OODT File Manager.png differ diff --git a/src/main/resources/cdtocode/cd/cd-Apache OODT File Manager.txt b/src/main/resources/cdtocode/cd/cd-Apache OODT File Manager.txt new file mode 100644 index 0000000000000000000000000000000000000000..2395f46f95c58a70be6e56d9b142acee12af382d --- /dev/null +++ b/src/main/resources/cdtocode/cd/cd-Apache OODT File Manager.txt @@ -0,0 +1,60 @@ +(611,446)AbstractHandler +@@@AbstractHandler +%AbstractLifeCycle +%继承¥AbstractHandler +%Handler +%继承¥@#(165,446)AbstractConnector +@@@AbstractConnector +%AbstractLifeCycle +%继承¥AbstractConnector +%Connector +%继承¥@#(366,303)AbstractLifeCycle +@@+doStart() ++doStop() +@AbstractLifeCycle +%LifeCycle +%继承¥@AbstractConnector +%AbstractLifeCycle +%继承¥AbstractHandler +%AbstractLifeCycle +%继承¥#(167,204)Connector +@+host: String ++port: int +@@Connector +%Buffers +%实现¥Connector +%LifeCycle +%实现¥@AbstractConnector +%Connector +%继承¥#(616,205)Handler +@@+handle(target,request,...) +@Handler +%LifeCycle +%实现¥@AbstractHandler +%Handler +%继承¥#(470,204)ThreadPool +@@+dispatch(Runnable) +@ThreadPool +%LifeCycle +%实现¥@#(110,19)Buffers +@@+getBuffer(size): Buffer ++returnBuffer(Buffer) +@Buffers +%Buffer +%依赖¥@Connector +%Buffers +%实现#(16,19)Buffer +@@@@Buffers +%Buffer +%依赖#(394,17)LifeCycle +@@+start() ++stop() +@@AbstractLifeCycle +%LifeCycle +%继承¥Connector +%LifeCycle +%实现¥ThreadPool +%LifeCycle +%实现¥Handler +%LifeCycle +%实现¥# \ No newline at end of file diff --git a/src/main/resources/cdtocode/cd/cd-Hadoop HDFS.png b/src/main/resources/cdtocode/cd/cd-Hadoop HDFS.png new file mode 100644 index 0000000000000000000000000000000000000000..a7f8ef9119841fccafbee1570f24d703c2b993e3 Binary files /dev/null and b/src/main/resources/cdtocode/cd/cd-Hadoop HDFS.png differ diff --git a/src/main/resources/cdtocode/cd/cd-Hadoop HDFS.txt b/src/main/resources/cdtocode/cd/cd-Hadoop HDFS.txt new file mode 100644 index 0000000000000000000000000000000000000000..2395f46f95c58a70be6e56d9b142acee12af382d --- /dev/null +++ b/src/main/resources/cdtocode/cd/cd-Hadoop HDFS.txt @@ -0,0 +1,60 @@ +(611,446)AbstractHandler +@@@AbstractHandler +%AbstractLifeCycle +%继承¥AbstractHandler +%Handler +%继承¥@#(165,446)AbstractConnector +@@@AbstractConnector +%AbstractLifeCycle +%继承¥AbstractConnector +%Connector +%继承¥@#(366,303)AbstractLifeCycle +@@+doStart() ++doStop() +@AbstractLifeCycle +%LifeCycle +%继承¥@AbstractConnector +%AbstractLifeCycle +%继承¥AbstractHandler +%AbstractLifeCycle +%继承¥#(167,204)Connector +@+host: String ++port: int +@@Connector +%Buffers +%实现¥Connector +%LifeCycle +%实现¥@AbstractConnector +%Connector +%继承¥#(616,205)Handler +@@+handle(target,request,...) +@Handler +%LifeCycle +%实现¥@AbstractHandler +%Handler +%继承¥#(470,204)ThreadPool +@@+dispatch(Runnable) +@ThreadPool +%LifeCycle +%实现¥@#(110,19)Buffers +@@+getBuffer(size): Buffer ++returnBuffer(Buffer) +@Buffers +%Buffer +%依赖¥@Connector +%Buffers +%实现#(16,19)Buffer +@@@@Buffers +%Buffer +%依赖#(394,17)LifeCycle +@@+start() ++stop() +@@AbstractLifeCycle +%LifeCycle +%继承¥Connector +%LifeCycle +%实现¥ThreadPool +%LifeCycle +%实现¥Handler +%LifeCycle +%实现¥# \ No newline at end of file diff --git a/src/main/resources/cdtocode/cd/cd-Hadoop MapReduce.png b/src/main/resources/cdtocode/cd/cd-Hadoop MapReduce.png new file mode 100644 index 0000000000000000000000000000000000000000..428934aecc9ecf9a8720c537ed092891e2b21a43 Binary files /dev/null and b/src/main/resources/cdtocode/cd/cd-Hadoop MapReduce.png differ diff --git a/src/main/resources/cdtocode/cd/cd-Hadoop MapReduce.txt b/src/main/resources/cdtocode/cd/cd-Hadoop MapReduce.txt new file mode 100644 index 0000000000000000000000000000000000000000..2395f46f95c58a70be6e56d9b142acee12af382d --- /dev/null +++ b/src/main/resources/cdtocode/cd/cd-Hadoop MapReduce.txt @@ -0,0 +1,60 @@ +(611,446)AbstractHandler +@@@AbstractHandler +%AbstractLifeCycle +%继承¥AbstractHandler +%Handler +%继承¥@#(165,446)AbstractConnector +@@@AbstractConnector +%AbstractLifeCycle +%继承¥AbstractConnector +%Connector +%继承¥@#(366,303)AbstractLifeCycle +@@+doStart() ++doStop() +@AbstractLifeCycle +%LifeCycle +%继承¥@AbstractConnector +%AbstractLifeCycle +%继承¥AbstractHandler +%AbstractLifeCycle +%继承¥#(167,204)Connector +@+host: String ++port: int +@@Connector +%Buffers +%实现¥Connector +%LifeCycle +%实现¥@AbstractConnector +%Connector +%继承¥#(616,205)Handler +@@+handle(target,request,...) +@Handler +%LifeCycle +%实现¥@AbstractHandler +%Handler +%继承¥#(470,204)ThreadPool +@@+dispatch(Runnable) +@ThreadPool +%LifeCycle +%实现¥@#(110,19)Buffers +@@+getBuffer(size): Buffer ++returnBuffer(Buffer) +@Buffers +%Buffer +%依赖¥@Connector +%Buffers +%实现#(16,19)Buffer +@@@@Buffers +%Buffer +%依赖#(394,17)LifeCycle +@@+start() ++stop() +@@AbstractLifeCycle +%LifeCycle +%继承¥Connector +%LifeCycle +%实现¥ThreadPool +%LifeCycle +%实现¥Handler +%LifeCycle +%实现¥# \ No newline at end of file diff --git a/src/main/resources/cdtocode/doc/Apache OODT File Manager/A FRAMEWORK FOR COLLABORATIVE REVIEW OF CANDIDATE EVENTS IN HIGH DATA RATE STREAMS THE V-FASTR EXPERIMENT AS A CASE STUDY-relation.txt b/src/main/resources/cdtocode/doc/Apache OODT File Manager/A FRAMEWORK FOR COLLABORATIVE REVIEW OF CANDIDATE EVENTS IN HIGH DATA RATE STREAMS THE V-FASTR EXPERIMENT AS A CASE STUDY-relation.txt new file mode 100644 index 0000000000000000000000000000000000000000..c92ae4773b74b21b9aa7b209b085a9f0f86cb88b --- /dev/null +++ b/src/main/resources/cdtocode/doc/Apache OODT File Manager/A FRAMEWORK FOR COLLABORATIVE REVIEW OF CANDIDATE EVENTS IN HIGH DATA RATE STREAMS THE V-FASTR EXPERIMENT AS A CASE STUDY-relation.txt @@ -0,0 +1,637 @@ +A FRAMEWORK FOR COLLABORATIVE REVIEW OF CANDIDATE EVENTS IN HIGH DATA RATE +STREAMS: THE V-FASTR EXPERIMENT AS A CASE STUDY +Andrew F. Hart, Luca Cinquini, Shakeh E. Khudikyan, David R. Thompson, +Chris A. Mattmann, Kiri Wagstaff, Joseph Lazio, and Dayton Jones +Jet Propulsion Laboratory, California Institute of Technology, Pasadena, CA 91109, USA; andrew.f.hart@jpl.nasa.gov +Received 2014 March 24; accepted 2014 August 10; published 2014 December 16 +ABSTRACT +“Fast radio transients” are defined here as bright millisecond pulses of radio-frequency energy. These shortduration +pulses can be produced by known objects such as pulsars or potentially by more exotic objects such as +evaporating black holes. The identification and verification of such an event would be of great scientific value. This +is one major goal of the Very Long Baseline Array (VLBA) Fast Transient Experiment (V-FASTR), a softwarebased +detection system installed at the VLBA. V-FASTR uses a “commensal” (piggy-back) approach, analyzing +all array data continually during routine VLBA observations and identifying candidate fast transient events. Raw +data can be stored from a buffer memory, which enables a comprehensive off-line analysis. This is invaluable for +validating the astrophysical origin of any detection. Candidates discovered by the automatic system must be +reviewed each day by analysts to identify any promising signals that warrant a more in-depth investigation. To +support the timely analysis of fast transient detection candidates by V-FASTR scientists, we have developed a +metadata-driven, collaborative candidate review framework. The framework consists of a software pipeline for +metadata processing composed of both open source software components and project-specific code written +expressly to extract and catalog metadata from the incoming V-FASTR data products, and a web-based data portal +that facilitates browsing and inspection of the available metadata for candidate events extracted from the VLBA +radio data. +Key words: catalogs – methods: data analysis – pulsars: general – radio continuum: general +1. INTRODUCTION +One of the current primary goals of radio astronomy is to +explore and understand the “dynamic radio sky” (Cordes +et al. 2004). In contrast to generating catalogs of known +sources, this scientific thrust focuses on transient events, or +transient signals generated by persistent yet time-varying +sources. We do not yet fully understand the scope and +distribution of different transient sources, which range from +the known (e.g., active galactic nuclei, brown dwarfs, flare +stars, X-ray binaries, supernovae, gamma-ray bursts) to the +probable (e.g., exoplanets), to the possible (e.g., ET +civilizations, annihilating black holes). As noted by Cordes +et al. (2004, p.14), “most exciting would be the discovery of +new classes of sources” (italics in original). Radio telescopes +continue to increase their data collecting abilities, observing +the sky with progressively finer time resolution. Of current +particular interest is the detection and characterization of +“fast radio transients,” which last for only small fractions of a +second. +The V-FASTR experiment (Wayth et al. 2011) is one of a +new breed of radio astronomy experiments specifically +targeting fast transient radio signals. The experiment is +conducted in a fully commensal (passive) fashion, searching +for signals in the data gathered during the regular processing +activities of its host instrument. Unlike more traditional, +single-telescope observations, however, the V-FASTR +experiment simultaneously utilizes anywhere between 2 and +10 telescopes of the National Radio Astronomy Observatory +ʼs (NRAO) Very Long Baseline Array (VLBA) (Romney +2010). The VLBA consists of 10 25 m telescopes that are +positioned geographically such that no 2 are within each +otherʼs local horizon, and the V-FASTR experiment +leverages this configuration to better discriminate between +instances of terrestrial Radio Frequency Interference (RFI) +and potentially genuine astronomical pulses (Thompson +et al. 2011). +The huge volumes of raw time-series voltage data generated +by the VLBA in the course of its operation make storing the +full record of an entire observing session infeasible at the +present time. As a consequence, considerable effort has been +devoted to developing and fine-tuning algorithms for the realtime +identification of potentially interesting signals in the noisy +and often incomplete data (Thompson et al. 2011; Wayth et al. +2012). All data selected by the real-time processing step is +subsequently inspected, on a daily basis, by members of the +geographically distributed V-FASTR science team and either +discarded as spurious or archived offline for full analysis at a +later date. +The V-FASTR experiment must therefore operate within +several important resource constraints: the inability to archive +the full observational record due to space constraints, and a +practical workload constraint upon the human analysts +reviewing candidate detections. To address the latter, we have +developed a metadata-driven, collaborative candidate review +framework for the V-FASTR experiment. The framework +comprises a set of software components dedicated to the +automatic capture and organization of metadata describing the +candidate events identified as interesting by the automated +algorithms, and an online environment for the collaborative +perusal and inspection of related imagery data by the V-FASTR +analysis team. +The rest of this paper describes the system as follows. In +Section 2 we describe our project in a more general context. +Section 3 presents the methodology and an architectural +description of the system. We follow with an evaluation of +The Astronomical Journal, 149:23 (7pp), 2015 January doi:10.1088/0004-6256/149/1/23 +© 2015. The American Astronomical Society. All rights reserved. +1 +our experience deploying the framework in Section 4, and +offer conclusions and future directions for the work in +Section 5. +2. BACKGROUND +To better understand the context of the system implementation +presented in Section 3, we first briefly introduce the VFASTR +experiment and describe the development of scientific +data systems at the NASA Jet Propulsion Laboratory (JPL). +We then describe the Object Oriented Data Technology +(OODT) project, an open source information integration +platform that plays a central role in our framework. Finally, +we briefly touch upon several related efforts at developing +online tools to collaboratively classify and validate scientific +observations. +2.1. V-FASTR: The VLBA Fast TRansients Experiment +V-FASTR (VLBA Fast TRansients) is a data analysis +system used by the VLBA to detect candidate fast transient +events. Principal investigators submit observing proposals to +the VLBA targeted at galaxies, supernovae, quasars, pulsars, +and more. V-FASTR analyzes all data collected by the VLBA +as part of routine processing and produces a nightly list of +candidates identified within the data processed that day. The +raw data for each candidate is temporarily saved in case it is +needed to interpret or follow up on a particularly promising or +unusual detection. However, the raw data consumes significant +disk space and therefore the candidate list must be reviewed on +a timely basis by experts. False positives can be deleted and +their disk space reclaimed, while truly interesting events can be +re-processed to enable the generation of a sky image to localize +the source of the signal. Software tools that streamline and +simplify this review process are therefore highly valued by +candidate reviewers and can have a positive impact on other +similar efforts throughout the world. +2.2. Data System Development at JPL +The Data Management Systems and Technologies group at +the JPL develops software ground data systems to support +NASA science missions. These pipelines are specifically +optimized to support the data-intensive and computationallyintensive +processing steps often needed to convert raw remotesensing +observations into higher level data products at scale so +that they can be interpreted by the scientists. The process +almost always involves developing a close collaboration with +project scientists to obtain an understanding of the processing +algorithms involved, a sense of the scale and throughput +requirements, and other operational constraints of the expected +production environment. +Over the years the group has developed a diverse portfolio of +data system experience across a broad spectrum of domains +including earth and climate science (Mattmann et al. 2009; +Hart et al. 2011; Tran et al. 2011), planetary science, +astrophysics, snow hydrology, radio astronomy, cancer +research (Crichton et al. 2001), and pediatric intensive care +(Crichton et al. 2011). +2.3. Open Source and OODT +One of the products of this long track record of experience in +the realm of scientific data processing systems is a suite of +software components known as OODT1 originally arose out of +a desire on the part of NASAʼs Office of Space Science to +improve the return on investment for individual mission data +systems by leveraging commonalities in their design to create a +reusable platform of configurable components, on top of which +mission-specific customizations could be made. OODT thus +represents both an architecture and a reference implementation. +Its components communicate with one another over standard, +open protocols such as XML-RPC2 and can be used either +individually, or coupled together to form more complex data +processing pipelines. +In 2009 OODT began the transition from a JPL-internal +development project to a free and open source software project +at the Apache Software Foundation (ASF).3 Graduating to a +top-level project in 2011, OODT has since undergone several +public releases at the ASF and is in use by a varied group of +scientific and commercial endeavors. As we will describe +further in Section 3, several OODT components form the core +platform of our candidate validation framework. The ready +availability of OODT components under a liberal license, +combined with their substantial pedigree was appealing to our +project both for time and budgetary considerations. +2.4. Related Work +In the following section we identify several ongoing efforts +that also utilize online tools to assist in the collaborative review +and classification of scientific observations. +2.4.1. Astropulse +Astropulse is part of a series of sky surveys for radio pulses +being conducted by the Search for Extraterrestrial Intelligence +(SETI) at the University of Berkeley (Siemion et al. 2010). +The Astropulse project conducts a survey of the sky from the +Arecibo Observatory in Puerto Rico, searching for short +(microsecond) broadband radio frequency pulses. While +Astropulseʼs use of Areciboʼs enormous single dish telescope +affords excellent sensitivity, V-FASTRʼs ability to perform +continent-scale baseline interferometery yields much greater +positional accuracy when attempting to localize the source of a +signal. +As a variant of the SETI@home project, Astropulse utilizes the +same distributed, collaborative volunteer computing infrastructure +accumulated over the years by that effort to perform a +number of computationally intense transformations and calculations +of the data in an attempt to better classify the origin of any +signals detected. The use of volunteer computing to perform units +of computational work is an appealing approach that obviates the +need to directly acquire sufficient hardware for the processing +demands. However, the fully automated nature of the approach is +not a natural fit for V-FASTRʼs manual review requirement. +2.4.2. Galaxy Zoo +GalaxyZoo4 is an Internet-based project that relies on the +help of volunteers to classify a very large database of galaxy +images recorded by either the Sloan Digital Sky Survey or the +Hubble telescope. Users are asked to classify galaxies based on +1 Apache OODT: http://oodt.apache.org/ +2 XML-RPC: http://xmlrpc.scripting.com/spec.html +3 http://apache.org/ +4 Galaxy Zoo: http://www.galaxyzoo.org/ +2 +The Astronomical Journal, 149:23 (7pp), 2015 January Hart et al. +shape, color and direction of rotation, and report on possible +unidentified features. The rationale behind human intervention +is that manual classification is more accurate and insightful +than any algorithm that can currently by undertaken by an +automatic program. To date, the project has met with success +that far exceeded expectations: more than 250,000 volunteers +have helped classify millions of images, resulting in the +confirmation of important scientific hypothesis, the formulation +of new ones, and the discovery of new interesting objects. +While Galaxy Zooʼs tactic of appealing to volunteers to +mitigate the challenge of image classification at scale is +attractive, the paradigm does not translate well to the V-FASTR +setting due to differences in the nature of the archives between +the two projects. Whereas Galaxy Zoo permits its volunteer +reviewers to leisurely peruse and mine a largely static image +archive, the rapidly growing data volumes associated with +ongoing V-FASTR observations dictate that reviews must be +regularly scheduled to keep the project within its resource +limits. +2.4.3. Foldit: The Protein Folding Game +Foldit (Cooper et al. 2010) is a collaborative online protein +folding game developed by the Center for Game Science +at the University of Washington, and it represents a +“crowd-sourced” attempt to solve the computationally challenging +task of predicting protein structure. Proteins, chains of +amino acids, play a key role in a wide range of human diseases, +but comparatively little is known about how they contort +themselves into the specific shapes that determine their +function. Because of the scale and complexity of the challenge, +the researchers behind Foldit have turned to the puzzle-solving +capabilities of human beings for assistance. After learning the +rules on simple challenges, players compete against one +another to design alternative protein structures, with the goal +of arriving at an arrangement that minimizes the total energy +needed to maintain the shape. +Foldit has created an environment in which the unknown and +diverse strategies of its human participants become a core +strength. Furthermore, by presenting the scientific activity as a +competitive game, the project, which currently boasts over +400,000 players, has shown that it is possible to recruit and +leverage human processing power at scale. This provides an +interesting model for other projects, including V-FASTR, +which at some point may rely upon a human element to +augment or improve automated processes. +3. IMPLEMENTATION +In this section we provide details on the implementation of +our metadata-driven framework for online review of V-FASTR +candidate detection events. We describe our methodology and +the considerations that informed our design, followed by a +presentation of the system architecture. +3.1. Development Methodology +Several factors influenced the development process and have +left their imprint on the final architecture. We feel that our +implementation is uniquely suited to the needs of the VFASTR +project precisely because these factors were identified +early on and were thus able to influence all aspects of the +design process. +3.1.1. Collaboration +As described in Section 2, our group has developed +substantial experience in the design and implementation of +data systems for a broad range of scientific domains. In each +case, a close working relationship with members of the project +science team was an essential ingredient to the success of the +project, and our experience developing an online candidate +review framework for V-FASTR was no different. As software +engineers familiar with the challenges inherent in scientific data +management, our intuitions about the technical challenges of +the system served us well in scoping out the project timeline. +However, it was our early and regular communication with +members of the V-FASTR science team that was critical to +obtaining the domain knowledge necessary to make accurate +assumptions, and in the early identification of issues. The +current system architecture, covering both the back and front +end elements, is a direct result of an ongoing feedback loop +between the science and software teams. +3.1.2. Constraints +As mentioned in Section 2, V-FASTR is a commensal +experiment that scans for fast transients in data that is already +being collected as part of the regular third-party use of the +VLBA instrument. As such, the experiment maintains a “guest” +status on the NRAO computing infrastructure. Consequently, +care must consistently be taken not to overtax NRAO system +resources, including disk storage, CPU time, and network +bandwidth. These physical constraints motivated many of the +architectural decisions described in the following sections. +Each V-FASTR data product may contain hundreds of files, +rooted at a top-level job directory, and includes two types of +products: filterbank data (up to ~100 GB per job) and +baseband voltage data (up to ~10 GB per job). The total data +storage capacity available to V-FASTR is just ~8 TB, enough +to contain ~800 jobs of ~10 GB each (on average). Because +products are produced at a average rate of ~10–20 per day (but +sometimes in the hundreds), the storage would be exhausted +within a few weeks unless products are periodically reviewed +by the science team analysts. During review, each candidate is +either flagged for higher-resolution processing (and saved) or +discarded as a false positive and the disk space reclaimed (see +Figure 1 for an overview of the average data volumes per job at +different processing stages). The desire to provide analysts with +a streamlined method for this review process is at the very core +of our design. +Similarly, the network bandwidth constraints of the host led +us to a data transfer configuration that focused on metadata +rather than requiring the complete transfer of raw, unreviewed, +and possibly spurious detection data over the Internet. Instead, +metadata sufficient to describe the salient characteristics of a +candidate event to a trained analyst was transferred into our +candidate review framework. This careful selection process had +the beneficial side effect of greatly limiting the size of the +transferred products, allowing for a considerably longer +retention period on the ~10 TB archive hosted at JPL. +Finally, security constraints were also critically important to +the design, particularly because the system spans two separate +security domains: NRAO and JPL. To comply with the security +requirements of the host system, data transfer was configured +on the NRAO system to allow read-only operations and was +made accessible only to clients originating from the JPL +3 +The Astronomical Journal, 149:23 (7pp), 2015 January Hart et al. +domain. Furthermore, on the front-end, the functionality +exposed by the web portal component interacted only with +the local metadata archive, eliminating the possibility of +corruption or inappropriate access to the raw observational +data. +3.2. Architecture +As previously mentioned, the candidate review framework is +driven by metadata describing the candidate events to be +reviewed by V-FASTR analysts. To communicate this data +from the raw source repository at the NRAO to an analyst using +a browser anywhere in the world, we developed a software +framework consisting of two principal components: a metadata +pipeline that manages the capture, transfer, and storage of +metadata annotations, and a web portal which provides analysts +with a convenient, context-rich environment for efficiently +classifying candidate events. +3.2.1. Metadata Pipeline +On the JPL side, the V-FASTR data products are processed +through a metadata extraction and data archiving pipeline that +eventually leads to the event candidates being available for +inspection on the web portal. The pipeline is composed of three +major software components: rsync, the OODT CAS Crawler, +and the OODT File Manager, depicted in Figure 2. +rsync. Data products are automatically transferred from the +NRAO staging area to the JPL server using rsync. rsync is a +popular application and data transfer protocol that allows to +synchronize the content of a directory tree between two +servers with minimal human intervention. It was chosen +because of its simplicity, high performance, reliability, and +wide range of configuration options. Through rsync, files are +transferred in compressed format and using delta encoding, +meaning that only the file differences are sent through +subsequent transfers. For this project, an rsync server +daemon was set up on the NRAO side to expose the data +staging area where the products are collected. For security +reasons, the daemon was restricted to allow read-only +operations to clients originating from a designated JPL IP +address. On the JPL side, an rsync client was set up to run +hourly as a system cron job, transferring products to the JPL +archive area. To minimize bandwidth usage, the client only +transfers a very small subset of the data comprising a product +directory tree, namely the detection images and the output +and calibration files containing the metadata needed by the +web portal. On average, this represents a reduction of the +data product size by a factor of 3.5 ´ 103: from an average +size of ~35 GB on the NRAO server (for a product with +several detections), to ~10 MB on the JPL server. The rsync +data transfer rates between the two servers were measured to +be around ~2 MBs-1, more than enough to transfer between +10 and 20 data products per day. +CAS Crawler. Once the data products are transferred to the +JPL server, they are automatically detected by the OODT +CAS Crawler daemon, which runs at sub-hour time intervals +to pick up new products as soon as they become available. +The Crawler is responsible for notifying the OODT File +Manager and therefore starting the product ingestion process. +For this deployment, the Crawler was configured to send a +signal only if two preconditions are both satisfied: (1) a +similarly named product does not already exist in the File +Manager catalog and (2) the product directory contains a +special marker file indicating that the product has been +processed by the mail program, and therefore is in a complete +state (i.e., no files are missing). +CAS File Manager. The OODT CAS File Manager is a +customizable software component that is responsible for +processing and archiving a data product, making it available +for query and access to clients. For this project, the File +Manager was deployed with the default Apache Lucene +metadata back-end, and configured to archive products +Figure 1. Depiction of the full V-FASTR data flow with volume estimates (per job) at each stage. The candidate review framework (both metadata pipeline and web +portal components) interact with the metadata and derived products repository at the intersection of A and B above. +4 +The Astronomical Journal, 149:23 (7pp), 2015 January Hart et al. +in-place, i.e., without moving them to a separate archive +directory, otherwise the rsync process would transfer them +again from the NRAO server. Additionally, we leveraged the +extensibility of the OODT framework by configuring the File +Manager with custom metadata extractors that were +purposely written to parse the information contained in the +V-FASTR output and calibration files. Information is +extracted at the three levels that comprise the hierarchy of +a V-FASTR data product: job, scan, and event. Additionally, +a numerical algorithm was written to assign each pair of +available images (-det.jpg and -dedisp.jpg) to the event that +generated them. +In general, a File Manager can store metadata in its back-end +catalog as different object types. Each object type is defined to +contain multiple metadata fields, where each field is composed +of a named key associated to one or more string values. For this +project, the decision was made to maintain a one-to-one +correspondence between a data product and the corresponding +metadata ingested into the catalog. So rather than defining three +object types for jobs, scans, and events, a single object type +was used holding all information for a data product in a single +container, with dynamically named keys that are encoded to +contain the scan and event numbers. This decision was +motivated by the desire to simplify and optimize the querying +of information by the web portal client, since all metadata for a +product is retrieved through a single request to the File +Manager. As a consequence, the default Apache Lucene +metadata catalog implementation had to be slightly modified +to allow for the ingestion of dynamically named metadata +fields. +3.2.2. Web Portal +The second major component of the candidate review +framework is an interactive web portal. The primary purpose +of the portal is to provide a convenient online environment for +the location-independent perusal and assessment of potential +candidates in context. The portal provides V-FASTR analysts +with the ability to quickly navigate through the available +information to identify candidates worthy of further inspection +on a familiar web platform. +The portal has been implemented as a PHP web application +using the Apache OODT Balance web framework running on +top of the Apache HTTPD Web Server. OODT Balance was +chosen here for its ability to easily integrate with the OODT +components in the back-end metadata pipeline, namely the +OODT CAS File Manager described earlier. Furthermore, the +flexible, modular approach of the framework allowed us to +quickly connect the web portal to the metadata repository and +rapidly begin constructing the necessary views specific to the +V-FASTR candidate review and validation use cases. +As Figure 3 shows, the web portal offers a variety of views +of the available metadata which are hierarchically organized to +match the conceptual relationships in the data. At the highest +level, a job or run might consist of multiple scans, each of +which may itself contain multiple detection event candidates. +This hierarchy, expressed in the metadata, is preserved in the +layout of the portal views, and the breadcrumb navigation +provided to facilitate orientation within the nested structure. +At the level of an individual event candidate (Figure 3, +middle image), two graphical representations of the event are +available to assist analysts in classifying the nature of the +signal. These images are generated automatically as part of the +initial candidate identification process (Wayth et al. 2011), and +they provide a trained analyst the necessary structural clues +needed to rapidly assess the received signal as being genuinely +extraterrestrial in origin or merely a product of RFI. +To support both metadata browsing in context and the desire +for an analyst to be able to rapidly peruse the image +representations of an entire job (many events in many scans) +at once, a compromise was struck whereby, for each job, a +portal user may select a traditional, hierarchical navigation or a +flattened view in which all of the (possibly numerous) event +candidates are presented simultaneously on screen and can be +accessed serially simply by scrolling the view. +Figure 2. Component diagram for the metadata pipeline component of the VFASTR +candidate review framework. +5 +The Astronomical Journal, 149:23 (7pp), 2015 January Hart et al. +Together, the metadata pipeline and the web portal constitute +an end-to-end framework for capturing, archiving, and +presenting metadata about detected transient event candidates +to V-FASTR scientists. Furthermore, by providing a reliable +process and flexible interface, the system directly streamlines +the analysis process, boosting the overall efficiency of the +project. +4. EVALUATION +As we have described in the previous section, the candidate +review framework embraces the model of online collaborative +validation of fast transient candidates by a team of geographically +dispersed analysts, and improves the efficiency with +which analysts may classify observational data. In this section +we describe the early results of our experience with the +operational deployment of the framework, as well as highlight +several areas for the evolution of the tool to further enhance its +utility. +4.1. Experience +The initial deployment of the collaborative review framework +for operational use by the V-FASTR science team was +made in early summer 2012. The immediate feedback was +largely positive: analysts praised the capabilities of the system, +the general improved accessibility afforded by a web-based +user interface, and the newfound capability to easily navigate +rapidly through all detections in a given job, or peruse the +different levels (scans and events) within a job individually. +The biggest initial complaint with the system was that too +many mouse clicks were required to complete an analysis of all +of the candidates in an entire job. +A consequence of the iterative feedback loop that developed +between the software and science teams (described further in +Section 3) was that suggestions for improvements were +repeatedly made, tracked, and acted upon. This process resulted +in an updated release occurring approximately every two weeks +during the first few months of the deployment. Suggestions for +improvements included the addition of various metadata fields +identified as critical to the classification task, updates to the +visual organization of the elements of the web portal views, and +a relentless focus on reducing the number of mouse clicks +required on the part of analyst users. +By the time of this writing, the V-FASTR portal has been +running operationally for several weeks, and we can draw some +early conclusions on usefulness of the system. Overall, as +reported by the science team, it seems like the project has +definitely accomplished its broad goal of facilitating the +collaborative task of inspecting and screening radio-transient +events. By extracting all relevant metadata from the latest data +products, and presenting it on the web portal in a concise +fashion, scientists can now execute their tasks more efficiently, +compared to earlier times when they had to log onto a terminal +and analyze the raw data manually. Additionally, the online +availability of all data and metadata through a browser interface +(as opposed to an ssh terminal) has allowed for greater +flexibility with regard to when and where evaluations can be +performed, including for the first time on a mobile device. +4.2. Evolution +On the whole, the ability to interact with the totality of the +candidate data and metadata through a browser interface has +greatly expanded the analysts’ ability to perform their tasks +with greater flexibility regarding when and where evaluations +can be performed. This includes, for the first time, anecdotal +accounts of an analyst reviewing candidates from a mobile +device. +With this freedom, in turn, has come a number of feature +requests which can be taken together to form a roadmap of +sorts for the evolution of the framework. Now that the +interaction with candidate metadata has transitioned to the +browser, the science team has identified three key features they +feel would complete the transition and entirely replace the prior +ad-hoc methods for coordinating the analysts’ activities: +Job assignment. As mentioned in Section 3, the timely +review of detection candidates is critical to remaining within +the resource constraints imposed upon the experiment. At the +moment, review jobs are assigned to analysts via email. +Augmenting the web portal with the ability to identify an +individual analyst would enable the presentation of +a prioritized list of that analystʼs outstanding review tasks. +Effort Tracking. Along the same lines, it is important to +spread the analysis load evenly across the science team, since +no one person is performing the analysis as his or her fulltime +job. Augmenting the framework with the ability to track +the analysis contributions of individual users over time +would assist in the equitable scheduling of future +review jobs. +Figure 3. Screen shots of the initial version of the web portal component. From left to right: the portal home page displaying recent jobs and associated event counts, +image metadata associated with an individual event candidate, full metadata listing, including associated scans, for an observation job. +6 +The Astronomical Journal, 149:23 (7pp), 2015 January Hart et al. +In-browser archiving. When an analyst determines a +candidate event merits high-resolution followup, the last +step is to archive the associated raw data so that it can be +evaluated at a later date. Currently, due to the security +restrictions permitting read-only access to external connections +to the archive at the NRAO (described in Section 3), +this process is handled out-of-band by the analyst logging +into an NRAO machine and archiving the appropriate data +manually. It is possible that, with the identity management +features discussed in the previous two items (and the +associated auditing capabilities that it could entail) the +restrictions might be negotiated to the point that certain +defined activities (such as archiving a single job directory) +could be initiated from within the portal environment. +5. CONCLUSION +V-FASTR, and commensal operations more generally, are +particularly challenging experiments due to extreme data +volume and real-time requirements. Processing occurs continually, +and the data flow must be coordinated across multiple +physical locations with transport mechanisms ranging from +FedEx transport (disks from the antenna), high-bandwidth +interconnects (the correlator and transient detection systems), +daily rsync over IP (the ska-dc mirror), and distributed WWW +protocols (manual review which takes place by analysts on +three continents). Various components of the system operate on +millisecond, hourly, and daily clocks and all components must +continue operating since there is very little margin for buffer +resources. In addition, the data processing components are +highly heterogeneous, with human experts playing their own +role as scheduled pattern recognition engines in the overall +architecture. By facilitating timely review, and reducing the +learning curve for new reviewers, the V-FASTR portal will +play a critical role in keeping the data flowing and making the +system sustainable in the long term. +This effort was supported by the Jet Propulsion Laboratory, +managed by the California Institute of Technology under a +contract with the National Aeronautics and Space +Administration. +REFERENCES +Cooper, S., Khatlib, F., & Treuille, A. 2010, Natur, 466, 756–60 +Cordes, J., Lazio, T., & McLaughlin, M. 2004, NewAR, 48, 1459–72 +Crichton, D., Kincaid, H., Downing, G., Srivastava, S., & Hughes, J. S. 2001, +in Proc. of the 14th IEEE Symp. on Computer-Based Medical Systems, An +Interoperable Data Architecture for Data Exchange in a Biomedical +Research Network (Piscataway, NJ: IEEE), 65–72 +Crichton, D., Mattmann, C., Hart, A., et al. 2011, in Proc. of the 24th +IEEE Symp. on Computer-Based Medical Systems An Informatics +Architecture for the Virtual Pediatric Intensive Care Unit (Piscataway, +NJ: IEEE), 1–6 +Hart, A., Goodale, C., Mattmann, C., et al. 2011, in Proc. of the 2nd Int. +Workshop on Software Engineering for Cloud Computing, A Cloudenabled +Regional Climate Model Evaluation System (New York: +ACM), 43–49 +Mattmann, C., Crichton, D., Medvidivic, N., & Hughes, J. S. 2006, in Proc. +2006 Int. Conf. on Software Engineering, A Software Architecture-based +Framework for Highly Distributed and Data Intensive Scientific +Applications (New York: ACM), 721–30 +Mattmann, C., Freeborn, D., Crichton, D., et al. 2009, in Proc. IEEE Int. Conf. +on Space Mission Challenges for Information Technology, A Reusable +Process Control System Framework for the Orbiting Carbon Observatory +and NPP Sounder PEATE Missions (Piscataway, NJ: IEEE), 165–72 +Romney, J. D. 2010, NRAO, http://www.vlba.nrao.edu/astro/obstatus/current/ +obssum.html. +Siemion, A., von Korff, J., McMahon, P., Korpela, E., & Werthimer, D. 2010, +AcAau, 67, 1342–9 +Thompson, D., Wagstaff, K., Brisken, W., et al. 2011, ApJ, 735, 98 +Tran, J., Cinquini, L., Mattmann, C., et al. 2011, in Evaluating Cloud +Computing in the NASA DESDynI Ground Data System Proc. of the II +International Workshop on Software Engineering for Cloud Computing +(New York: ACM), 36–42 +Wayth, R., Brisken, W., Deller, A., et al. 2011, ApJ, 735, 97 +Wayth, R., Tingay, S., & Deller, A. 2012, ApJL, 753, L36 +7 +The Astronomical Journal, 149:23 (7pp), 2015 January Hart et al. \ No newline at end of file diff --git a/src/main/resources/cdtocode/doc/Apache OODT File Manager/A FRAMEWORK FOR COLLABORATIVE REVIEW OF CANDIDATE EVENTS IN HIGH DATA RATE STREAMS THE V-FASTR EXPERIMENT AS A CASE STUDY-simEnts.txt b/src/main/resources/cdtocode/doc/Apache OODT File Manager/A FRAMEWORK FOR COLLABORATIVE REVIEW OF CANDIDATE EVENTS IN HIGH DATA RATE STREAMS THE V-FASTR EXPERIMENT AS A CASE STUDY-simEnts.txt new file mode 100644 index 0000000000000000000000000000000000000000..c92ae4773b74b21b9aa7b209b085a9f0f86cb88b --- /dev/null +++ b/src/main/resources/cdtocode/doc/Apache OODT File Manager/A FRAMEWORK FOR COLLABORATIVE REVIEW OF CANDIDATE EVENTS IN HIGH DATA RATE STREAMS THE V-FASTR EXPERIMENT AS A CASE STUDY-simEnts.txt @@ -0,0 +1,637 @@ +A FRAMEWORK FOR COLLABORATIVE REVIEW OF CANDIDATE EVENTS IN HIGH DATA RATE +STREAMS: THE V-FASTR EXPERIMENT AS A CASE STUDY +Andrew F. Hart, Luca Cinquini, Shakeh E. Khudikyan, David R. Thompson, +Chris A. Mattmann, Kiri Wagstaff, Joseph Lazio, and Dayton Jones +Jet Propulsion Laboratory, California Institute of Technology, Pasadena, CA 91109, USA; andrew.f.hart@jpl.nasa.gov +Received 2014 March 24; accepted 2014 August 10; published 2014 December 16 +ABSTRACT +“Fast radio transients” are defined here as bright millisecond pulses of radio-frequency energy. These shortduration +pulses can be produced by known objects such as pulsars or potentially by more exotic objects such as +evaporating black holes. The identification and verification of such an event would be of great scientific value. This +is one major goal of the Very Long Baseline Array (VLBA) Fast Transient Experiment (V-FASTR), a softwarebased +detection system installed at the VLBA. V-FASTR uses a “commensal” (piggy-back) approach, analyzing +all array data continually during routine VLBA observations and identifying candidate fast transient events. Raw +data can be stored from a buffer memory, which enables a comprehensive off-line analysis. This is invaluable for +validating the astrophysical origin of any detection. Candidates discovered by the automatic system must be +reviewed each day by analysts to identify any promising signals that warrant a more in-depth investigation. To +support the timely analysis of fast transient detection candidates by V-FASTR scientists, we have developed a +metadata-driven, collaborative candidate review framework. The framework consists of a software pipeline for +metadata processing composed of both open source software components and project-specific code written +expressly to extract and catalog metadata from the incoming V-FASTR data products, and a web-based data portal +that facilitates browsing and inspection of the available metadata for candidate events extracted from the VLBA +radio data. +Key words: catalogs – methods: data analysis – pulsars: general – radio continuum: general +1. INTRODUCTION +One of the current primary goals of radio astronomy is to +explore and understand the “dynamic radio sky” (Cordes +et al. 2004). In contrast to generating catalogs of known +sources, this scientific thrust focuses on transient events, or +transient signals generated by persistent yet time-varying +sources. We do not yet fully understand the scope and +distribution of different transient sources, which range from +the known (e.g., active galactic nuclei, brown dwarfs, flare +stars, X-ray binaries, supernovae, gamma-ray bursts) to the +probable (e.g., exoplanets), to the possible (e.g., ET +civilizations, annihilating black holes). As noted by Cordes +et al. (2004, p.14), “most exciting would be the discovery of +new classes of sources” (italics in original). Radio telescopes +continue to increase their data collecting abilities, observing +the sky with progressively finer time resolution. Of current +particular interest is the detection and characterization of +“fast radio transients,” which last for only small fractions of a +second. +The V-FASTR experiment (Wayth et al. 2011) is one of a +new breed of radio astronomy experiments specifically +targeting fast transient radio signals. The experiment is +conducted in a fully commensal (passive) fashion, searching +for signals in the data gathered during the regular processing +activities of its host instrument. Unlike more traditional, +single-telescope observations, however, the V-FASTR +experiment simultaneously utilizes anywhere between 2 and +10 telescopes of the National Radio Astronomy Observatory +ʼs (NRAO) Very Long Baseline Array (VLBA) (Romney +2010). The VLBA consists of 10 25 m telescopes that are +positioned geographically such that no 2 are within each +otherʼs local horizon, and the V-FASTR experiment +leverages this configuration to better discriminate between +instances of terrestrial Radio Frequency Interference (RFI) +and potentially genuine astronomical pulses (Thompson +et al. 2011). +The huge volumes of raw time-series voltage data generated +by the VLBA in the course of its operation make storing the +full record of an entire observing session infeasible at the +present time. As a consequence, considerable effort has been +devoted to developing and fine-tuning algorithms for the realtime +identification of potentially interesting signals in the noisy +and often incomplete data (Thompson et al. 2011; Wayth et al. +2012). All data selected by the real-time processing step is +subsequently inspected, on a daily basis, by members of the +geographically distributed V-FASTR science team and either +discarded as spurious or archived offline for full analysis at a +later date. +The V-FASTR experiment must therefore operate within +several important resource constraints: the inability to archive +the full observational record due to space constraints, and a +practical workload constraint upon the human analysts +reviewing candidate detections. To address the latter, we have +developed a metadata-driven, collaborative candidate review +framework for the V-FASTR experiment. The framework +comprises a set of software components dedicated to the +automatic capture and organization of metadata describing the +candidate events identified as interesting by the automated +algorithms, and an online environment for the collaborative +perusal and inspection of related imagery data by the V-FASTR +analysis team. +The rest of this paper describes the system as follows. In +Section 2 we describe our project in a more general context. +Section 3 presents the methodology and an architectural +description of the system. We follow with an evaluation of +The Astronomical Journal, 149:23 (7pp), 2015 January doi:10.1088/0004-6256/149/1/23 +© 2015. The American Astronomical Society. All rights reserved. +1 +our experience deploying the framework in Section 4, and +offer conclusions and future directions for the work in +Section 5. +2. BACKGROUND +To better understand the context of the system implementation +presented in Section 3, we first briefly introduce the VFASTR +experiment and describe the development of scientific +data systems at the NASA Jet Propulsion Laboratory (JPL). +We then describe the Object Oriented Data Technology +(OODT) project, an open source information integration +platform that plays a central role in our framework. Finally, +we briefly touch upon several related efforts at developing +online tools to collaboratively classify and validate scientific +observations. +2.1. V-FASTR: The VLBA Fast TRansients Experiment +V-FASTR (VLBA Fast TRansients) is a data analysis +system used by the VLBA to detect candidate fast transient +events. Principal investigators submit observing proposals to +the VLBA targeted at galaxies, supernovae, quasars, pulsars, +and more. V-FASTR analyzes all data collected by the VLBA +as part of routine processing and produces a nightly list of +candidates identified within the data processed that day. The +raw data for each candidate is temporarily saved in case it is +needed to interpret or follow up on a particularly promising or +unusual detection. However, the raw data consumes significant +disk space and therefore the candidate list must be reviewed on +a timely basis by experts. False positives can be deleted and +their disk space reclaimed, while truly interesting events can be +re-processed to enable the generation of a sky image to localize +the source of the signal. Software tools that streamline and +simplify this review process are therefore highly valued by +candidate reviewers and can have a positive impact on other +similar efforts throughout the world. +2.2. Data System Development at JPL +The Data Management Systems and Technologies group at +the JPL develops software ground data systems to support +NASA science missions. These pipelines are specifically +optimized to support the data-intensive and computationallyintensive +processing steps often needed to convert raw remotesensing +observations into higher level data products at scale so +that they can be interpreted by the scientists. The process +almost always involves developing a close collaboration with +project scientists to obtain an understanding of the processing +algorithms involved, a sense of the scale and throughput +requirements, and other operational constraints of the expected +production environment. +Over the years the group has developed a diverse portfolio of +data system experience across a broad spectrum of domains +including earth and climate science (Mattmann et al. 2009; +Hart et al. 2011; Tran et al. 2011), planetary science, +astrophysics, snow hydrology, radio astronomy, cancer +research (Crichton et al. 2001), and pediatric intensive care +(Crichton et al. 2011). +2.3. Open Source and OODT +One of the products of this long track record of experience in +the realm of scientific data processing systems is a suite of +software components known as OODT1 originally arose out of +a desire on the part of NASAʼs Office of Space Science to +improve the return on investment for individual mission data +systems by leveraging commonalities in their design to create a +reusable platform of configurable components, on top of which +mission-specific customizations could be made. OODT thus +represents both an architecture and a reference implementation. +Its components communicate with one another over standard, +open protocols such as XML-RPC2 and can be used either +individually, or coupled together to form more complex data +processing pipelines. +In 2009 OODT began the transition from a JPL-internal +development project to a free and open source software project +at the Apache Software Foundation (ASF).3 Graduating to a +top-level project in 2011, OODT has since undergone several +public releases at the ASF and is in use by a varied group of +scientific and commercial endeavors. As we will describe +further in Section 3, several OODT components form the core +platform of our candidate validation framework. The ready +availability of OODT components under a liberal license, +combined with their substantial pedigree was appealing to our +project both for time and budgetary considerations. +2.4. Related Work +In the following section we identify several ongoing efforts +that also utilize online tools to assist in the collaborative review +and classification of scientific observations. +2.4.1. Astropulse +Astropulse is part of a series of sky surveys for radio pulses +being conducted by the Search for Extraterrestrial Intelligence +(SETI) at the University of Berkeley (Siemion et al. 2010). +The Astropulse project conducts a survey of the sky from the +Arecibo Observatory in Puerto Rico, searching for short +(microsecond) broadband radio frequency pulses. While +Astropulseʼs use of Areciboʼs enormous single dish telescope +affords excellent sensitivity, V-FASTRʼs ability to perform +continent-scale baseline interferometery yields much greater +positional accuracy when attempting to localize the source of a +signal. +As a variant of the SETI@home project, Astropulse utilizes the +same distributed, collaborative volunteer computing infrastructure +accumulated over the years by that effort to perform a +number of computationally intense transformations and calculations +of the data in an attempt to better classify the origin of any +signals detected. The use of volunteer computing to perform units +of computational work is an appealing approach that obviates the +need to directly acquire sufficient hardware for the processing +demands. However, the fully automated nature of the approach is +not a natural fit for V-FASTRʼs manual review requirement. +2.4.2. Galaxy Zoo +GalaxyZoo4 is an Internet-based project that relies on the +help of volunteers to classify a very large database of galaxy +images recorded by either the Sloan Digital Sky Survey or the +Hubble telescope. Users are asked to classify galaxies based on +1 Apache OODT: http://oodt.apache.org/ +2 XML-RPC: http://xmlrpc.scripting.com/spec.html +3 http://apache.org/ +4 Galaxy Zoo: http://www.galaxyzoo.org/ +2 +The Astronomical Journal, 149:23 (7pp), 2015 January Hart et al. +shape, color and direction of rotation, and report on possible +unidentified features. The rationale behind human intervention +is that manual classification is more accurate and insightful +than any algorithm that can currently by undertaken by an +automatic program. To date, the project has met with success +that far exceeded expectations: more than 250,000 volunteers +have helped classify millions of images, resulting in the +confirmation of important scientific hypothesis, the formulation +of new ones, and the discovery of new interesting objects. +While Galaxy Zooʼs tactic of appealing to volunteers to +mitigate the challenge of image classification at scale is +attractive, the paradigm does not translate well to the V-FASTR +setting due to differences in the nature of the archives between +the two projects. Whereas Galaxy Zoo permits its volunteer +reviewers to leisurely peruse and mine a largely static image +archive, the rapidly growing data volumes associated with +ongoing V-FASTR observations dictate that reviews must be +regularly scheduled to keep the project within its resource +limits. +2.4.3. Foldit: The Protein Folding Game +Foldit (Cooper et al. 2010) is a collaborative online protein +folding game developed by the Center for Game Science +at the University of Washington, and it represents a +“crowd-sourced” attempt to solve the computationally challenging +task of predicting protein structure. Proteins, chains of +amino acids, play a key role in a wide range of human diseases, +but comparatively little is known about how they contort +themselves into the specific shapes that determine their +function. Because of the scale and complexity of the challenge, +the researchers behind Foldit have turned to the puzzle-solving +capabilities of human beings for assistance. After learning the +rules on simple challenges, players compete against one +another to design alternative protein structures, with the goal +of arriving at an arrangement that minimizes the total energy +needed to maintain the shape. +Foldit has created an environment in which the unknown and +diverse strategies of its human participants become a core +strength. Furthermore, by presenting the scientific activity as a +competitive game, the project, which currently boasts over +400,000 players, has shown that it is possible to recruit and +leverage human processing power at scale. This provides an +interesting model for other projects, including V-FASTR, +which at some point may rely upon a human element to +augment or improve automated processes. +3. IMPLEMENTATION +In this section we provide details on the implementation of +our metadata-driven framework for online review of V-FASTR +candidate detection events. We describe our methodology and +the considerations that informed our design, followed by a +presentation of the system architecture. +3.1. Development Methodology +Several factors influenced the development process and have +left their imprint on the final architecture. We feel that our +implementation is uniquely suited to the needs of the VFASTR +project precisely because these factors were identified +early on and were thus able to influence all aspects of the +design process. +3.1.1. Collaboration +As described in Section 2, our group has developed +substantial experience in the design and implementation of +data systems for a broad range of scientific domains. In each +case, a close working relationship with members of the project +science team was an essential ingredient to the success of the +project, and our experience developing an online candidate +review framework for V-FASTR was no different. As software +engineers familiar with the challenges inherent in scientific data +management, our intuitions about the technical challenges of +the system served us well in scoping out the project timeline. +However, it was our early and regular communication with +members of the V-FASTR science team that was critical to +obtaining the domain knowledge necessary to make accurate +assumptions, and in the early identification of issues. The +current system architecture, covering both the back and front +end elements, is a direct result of an ongoing feedback loop +between the science and software teams. +3.1.2. Constraints +As mentioned in Section 2, V-FASTR is a commensal +experiment that scans for fast transients in data that is already +being collected as part of the regular third-party use of the +VLBA instrument. As such, the experiment maintains a “guest” +status on the NRAO computing infrastructure. Consequently, +care must consistently be taken not to overtax NRAO system +resources, including disk storage, CPU time, and network +bandwidth. These physical constraints motivated many of the +architectural decisions described in the following sections. +Each V-FASTR data product may contain hundreds of files, +rooted at a top-level job directory, and includes two types of +products: filterbank data (up to ~100 GB per job) and +baseband voltage data (up to ~10 GB per job). The total data +storage capacity available to V-FASTR is just ~8 TB, enough +to contain ~800 jobs of ~10 GB each (on average). Because +products are produced at a average rate of ~10–20 per day (but +sometimes in the hundreds), the storage would be exhausted +within a few weeks unless products are periodically reviewed +by the science team analysts. During review, each candidate is +either flagged for higher-resolution processing (and saved) or +discarded as a false positive and the disk space reclaimed (see +Figure 1 for an overview of the average data volumes per job at +different processing stages). The desire to provide analysts with +a streamlined method for this review process is at the very core +of our design. +Similarly, the network bandwidth constraints of the host led +us to a data transfer configuration that focused on metadata +rather than requiring the complete transfer of raw, unreviewed, +and possibly spurious detection data over the Internet. Instead, +metadata sufficient to describe the salient characteristics of a +candidate event to a trained analyst was transferred into our +candidate review framework. This careful selection process had +the beneficial side effect of greatly limiting the size of the +transferred products, allowing for a considerably longer +retention period on the ~10 TB archive hosted at JPL. +Finally, security constraints were also critically important to +the design, particularly because the system spans two separate +security domains: NRAO and JPL. To comply with the security +requirements of the host system, data transfer was configured +on the NRAO system to allow read-only operations and was +made accessible only to clients originating from the JPL +3 +The Astronomical Journal, 149:23 (7pp), 2015 January Hart et al. +domain. Furthermore, on the front-end, the functionality +exposed by the web portal component interacted only with +the local metadata archive, eliminating the possibility of +corruption or inappropriate access to the raw observational +data. +3.2. Architecture +As previously mentioned, the candidate review framework is +driven by metadata describing the candidate events to be +reviewed by V-FASTR analysts. To communicate this data +from the raw source repository at the NRAO to an analyst using +a browser anywhere in the world, we developed a software +framework consisting of two principal components: a metadata +pipeline that manages the capture, transfer, and storage of +metadata annotations, and a web portal which provides analysts +with a convenient, context-rich environment for efficiently +classifying candidate events. +3.2.1. Metadata Pipeline +On the JPL side, the V-FASTR data products are processed +through a metadata extraction and data archiving pipeline that +eventually leads to the event candidates being available for +inspection on the web portal. The pipeline is composed of three +major software components: rsync, the OODT CAS Crawler, +and the OODT File Manager, depicted in Figure 2. +rsync. Data products are automatically transferred from the +NRAO staging area to the JPL server using rsync. rsync is a +popular application and data transfer protocol that allows to +synchronize the content of a directory tree between two +servers with minimal human intervention. It was chosen +because of its simplicity, high performance, reliability, and +wide range of configuration options. Through rsync, files are +transferred in compressed format and using delta encoding, +meaning that only the file differences are sent through +subsequent transfers. For this project, an rsync server +daemon was set up on the NRAO side to expose the data +staging area where the products are collected. For security +reasons, the daemon was restricted to allow read-only +operations to clients originating from a designated JPL IP +address. On the JPL side, an rsync client was set up to run +hourly as a system cron job, transferring products to the JPL +archive area. To minimize bandwidth usage, the client only +transfers a very small subset of the data comprising a product +directory tree, namely the detection images and the output +and calibration files containing the metadata needed by the +web portal. On average, this represents a reduction of the +data product size by a factor of 3.5 ´ 103: from an average +size of ~35 GB on the NRAO server (for a product with +several detections), to ~10 MB on the JPL server. The rsync +data transfer rates between the two servers were measured to +be around ~2 MBs-1, more than enough to transfer between +10 and 20 data products per day. +CAS Crawler. Once the data products are transferred to the +JPL server, they are automatically detected by the OODT +CAS Crawler daemon, which runs at sub-hour time intervals +to pick up new products as soon as they become available. +The Crawler is responsible for notifying the OODT File +Manager and therefore starting the product ingestion process. +For this deployment, the Crawler was configured to send a +signal only if two preconditions are both satisfied: (1) a +similarly named product does not already exist in the File +Manager catalog and (2) the product directory contains a +special marker file indicating that the product has been +processed by the mail program, and therefore is in a complete +state (i.e., no files are missing). +CAS File Manager. The OODT CAS File Manager is a +customizable software component that is responsible for +processing and archiving a data product, making it available +for query and access to clients. For this project, the File +Manager was deployed with the default Apache Lucene +metadata back-end, and configured to archive products +Figure 1. Depiction of the full V-FASTR data flow with volume estimates (per job) at each stage. The candidate review framework (both metadata pipeline and web +portal components) interact with the metadata and derived products repository at the intersection of A and B above. +4 +The Astronomical Journal, 149:23 (7pp), 2015 January Hart et al. +in-place, i.e., without moving them to a separate archive +directory, otherwise the rsync process would transfer them +again from the NRAO server. Additionally, we leveraged the +extensibility of the OODT framework by configuring the File +Manager with custom metadata extractors that were +purposely written to parse the information contained in the +V-FASTR output and calibration files. Information is +extracted at the three levels that comprise the hierarchy of +a V-FASTR data product: job, scan, and event. Additionally, +a numerical algorithm was written to assign each pair of +available images (-det.jpg and -dedisp.jpg) to the event that +generated them. +In general, a File Manager can store metadata in its back-end +catalog as different object types. Each object type is defined to +contain multiple metadata fields, where each field is composed +of a named key associated to one or more string values. For this +project, the decision was made to maintain a one-to-one +correspondence between a data product and the corresponding +metadata ingested into the catalog. So rather than defining three +object types for jobs, scans, and events, a single object type +was used holding all information for a data product in a single +container, with dynamically named keys that are encoded to +contain the scan and event numbers. This decision was +motivated by the desire to simplify and optimize the querying +of information by the web portal client, since all metadata for a +product is retrieved through a single request to the File +Manager. As a consequence, the default Apache Lucene +metadata catalog implementation had to be slightly modified +to allow for the ingestion of dynamically named metadata +fields. +3.2.2. Web Portal +The second major component of the candidate review +framework is an interactive web portal. The primary purpose +of the portal is to provide a convenient online environment for +the location-independent perusal and assessment of potential +candidates in context. The portal provides V-FASTR analysts +with the ability to quickly navigate through the available +information to identify candidates worthy of further inspection +on a familiar web platform. +The portal has been implemented as a PHP web application +using the Apache OODT Balance web framework running on +top of the Apache HTTPD Web Server. OODT Balance was +chosen here for its ability to easily integrate with the OODT +components in the back-end metadata pipeline, namely the +OODT CAS File Manager described earlier. Furthermore, the +flexible, modular approach of the framework allowed us to +quickly connect the web portal to the metadata repository and +rapidly begin constructing the necessary views specific to the +V-FASTR candidate review and validation use cases. +As Figure 3 shows, the web portal offers a variety of views +of the available metadata which are hierarchically organized to +match the conceptual relationships in the data. At the highest +level, a job or run might consist of multiple scans, each of +which may itself contain multiple detection event candidates. +This hierarchy, expressed in the metadata, is preserved in the +layout of the portal views, and the breadcrumb navigation +provided to facilitate orientation within the nested structure. +At the level of an individual event candidate (Figure 3, +middle image), two graphical representations of the event are +available to assist analysts in classifying the nature of the +signal. These images are generated automatically as part of the +initial candidate identification process (Wayth et al. 2011), and +they provide a trained analyst the necessary structural clues +needed to rapidly assess the received signal as being genuinely +extraterrestrial in origin or merely a product of RFI. +To support both metadata browsing in context and the desire +for an analyst to be able to rapidly peruse the image +representations of an entire job (many events in many scans) +at once, a compromise was struck whereby, for each job, a +portal user may select a traditional, hierarchical navigation or a +flattened view in which all of the (possibly numerous) event +candidates are presented simultaneously on screen and can be +accessed serially simply by scrolling the view. +Figure 2. Component diagram for the metadata pipeline component of the VFASTR +candidate review framework. +5 +The Astronomical Journal, 149:23 (7pp), 2015 January Hart et al. +Together, the metadata pipeline and the web portal constitute +an end-to-end framework for capturing, archiving, and +presenting metadata about detected transient event candidates +to V-FASTR scientists. Furthermore, by providing a reliable +process and flexible interface, the system directly streamlines +the analysis process, boosting the overall efficiency of the +project. +4. EVALUATION +As we have described in the previous section, the candidate +review framework embraces the model of online collaborative +validation of fast transient candidates by a team of geographically +dispersed analysts, and improves the efficiency with +which analysts may classify observational data. In this section +we describe the early results of our experience with the +operational deployment of the framework, as well as highlight +several areas for the evolution of the tool to further enhance its +utility. +4.1. Experience +The initial deployment of the collaborative review framework +for operational use by the V-FASTR science team was +made in early summer 2012. The immediate feedback was +largely positive: analysts praised the capabilities of the system, +the general improved accessibility afforded by a web-based +user interface, and the newfound capability to easily navigate +rapidly through all detections in a given job, or peruse the +different levels (scans and events) within a job individually. +The biggest initial complaint with the system was that too +many mouse clicks were required to complete an analysis of all +of the candidates in an entire job. +A consequence of the iterative feedback loop that developed +between the software and science teams (described further in +Section 3) was that suggestions for improvements were +repeatedly made, tracked, and acted upon. This process resulted +in an updated release occurring approximately every two weeks +during the first few months of the deployment. Suggestions for +improvements included the addition of various metadata fields +identified as critical to the classification task, updates to the +visual organization of the elements of the web portal views, and +a relentless focus on reducing the number of mouse clicks +required on the part of analyst users. +By the time of this writing, the V-FASTR portal has been +running operationally for several weeks, and we can draw some +early conclusions on usefulness of the system. Overall, as +reported by the science team, it seems like the project has +definitely accomplished its broad goal of facilitating the +collaborative task of inspecting and screening radio-transient +events. By extracting all relevant metadata from the latest data +products, and presenting it on the web portal in a concise +fashion, scientists can now execute their tasks more efficiently, +compared to earlier times when they had to log onto a terminal +and analyze the raw data manually. Additionally, the online +availability of all data and metadata through a browser interface +(as opposed to an ssh terminal) has allowed for greater +flexibility with regard to when and where evaluations can be +performed, including for the first time on a mobile device. +4.2. Evolution +On the whole, the ability to interact with the totality of the +candidate data and metadata through a browser interface has +greatly expanded the analysts’ ability to perform their tasks +with greater flexibility regarding when and where evaluations +can be performed. This includes, for the first time, anecdotal +accounts of an analyst reviewing candidates from a mobile +device. +With this freedom, in turn, has come a number of feature +requests which can be taken together to form a roadmap of +sorts for the evolution of the framework. Now that the +interaction with candidate metadata has transitioned to the +browser, the science team has identified three key features they +feel would complete the transition and entirely replace the prior +ad-hoc methods for coordinating the analysts’ activities: +Job assignment. As mentioned in Section 3, the timely +review of detection candidates is critical to remaining within +the resource constraints imposed upon the experiment. At the +moment, review jobs are assigned to analysts via email. +Augmenting the web portal with the ability to identify an +individual analyst would enable the presentation of +a prioritized list of that analystʼs outstanding review tasks. +Effort Tracking. Along the same lines, it is important to +spread the analysis load evenly across the science team, since +no one person is performing the analysis as his or her fulltime +job. Augmenting the framework with the ability to track +the analysis contributions of individual users over time +would assist in the equitable scheduling of future +review jobs. +Figure 3. Screen shots of the initial version of the web portal component. From left to right: the portal home page displaying recent jobs and associated event counts, +image metadata associated with an individual event candidate, full metadata listing, including associated scans, for an observation job. +6 +The Astronomical Journal, 149:23 (7pp), 2015 January Hart et al. +In-browser archiving. When an analyst determines a +candidate event merits high-resolution followup, the last +step is to archive the associated raw data so that it can be +evaluated at a later date. Currently, due to the security +restrictions permitting read-only access to external connections +to the archive at the NRAO (described in Section 3), +this process is handled out-of-band by the analyst logging +into an NRAO machine and archiving the appropriate data +manually. It is possible that, with the identity management +features discussed in the previous two items (and the +associated auditing capabilities that it could entail) the +restrictions might be negotiated to the point that certain +defined activities (such as archiving a single job directory) +could be initiated from within the portal environment. +5. CONCLUSION +V-FASTR, and commensal operations more generally, are +particularly challenging experiments due to extreme data +volume and real-time requirements. Processing occurs continually, +and the data flow must be coordinated across multiple +physical locations with transport mechanisms ranging from +FedEx transport (disks from the antenna), high-bandwidth +interconnects (the correlator and transient detection systems), +daily rsync over IP (the ska-dc mirror), and distributed WWW +protocols (manual review which takes place by analysts on +three continents). Various components of the system operate on +millisecond, hourly, and daily clocks and all components must +continue operating since there is very little margin for buffer +resources. In addition, the data processing components are +highly heterogeneous, with human experts playing their own +role as scheduled pattern recognition engines in the overall +architecture. By facilitating timely review, and reducing the +learning curve for new reviewers, the V-FASTR portal will +play a critical role in keeping the data flowing and making the +system sustainable in the long term. +This effort was supported by the Jet Propulsion Laboratory, +managed by the California Institute of Technology under a +contract with the National Aeronautics and Space +Administration. +REFERENCES +Cooper, S., Khatlib, F., & Treuille, A. 2010, Natur, 466, 756–60 +Cordes, J., Lazio, T., & McLaughlin, M. 2004, NewAR, 48, 1459–72 +Crichton, D., Kincaid, H., Downing, G., Srivastava, S., & Hughes, J. S. 2001, +in Proc. of the 14th IEEE Symp. on Computer-Based Medical Systems, An +Interoperable Data Architecture for Data Exchange in a Biomedical +Research Network (Piscataway, NJ: IEEE), 65–72 +Crichton, D., Mattmann, C., Hart, A., et al. 2011, in Proc. of the 24th +IEEE Symp. on Computer-Based Medical Systems An Informatics +Architecture for the Virtual Pediatric Intensive Care Unit (Piscataway, +NJ: IEEE), 1–6 +Hart, A., Goodale, C., Mattmann, C., et al. 2011, in Proc. of the 2nd Int. +Workshop on Software Engineering for Cloud Computing, A Cloudenabled +Regional Climate Model Evaluation System (New York: +ACM), 43–49 +Mattmann, C., Crichton, D., Medvidivic, N., & Hughes, J. S. 2006, in Proc. +2006 Int. Conf. on Software Engineering, A Software Architecture-based +Framework for Highly Distributed and Data Intensive Scientific +Applications (New York: ACM), 721–30 +Mattmann, C., Freeborn, D., Crichton, D., et al. 2009, in Proc. IEEE Int. Conf. +on Space Mission Challenges for Information Technology, A Reusable +Process Control System Framework for the Orbiting Carbon Observatory +and NPP Sounder PEATE Missions (Piscataway, NJ: IEEE), 165–72 +Romney, J. D. 2010, NRAO, http://www.vlba.nrao.edu/astro/obstatus/current/ +obssum.html. +Siemion, A., von Korff, J., McMahon, P., Korpela, E., & Werthimer, D. 2010, +AcAau, 67, 1342–9 +Thompson, D., Wagstaff, K., Brisken, W., et al. 2011, ApJ, 735, 98 +Tran, J., Cinquini, L., Mattmann, C., et al. 2011, in Evaluating Cloud +Computing in the NASA DESDynI Ground Data System Proc. of the II +International Workshop on Software Engineering for Cloud Computing +(New York: ACM), 36–42 +Wayth, R., Brisken, W., Deller, A., et al. 2011, ApJ, 735, 97 +Wayth, R., Tingay, S., & Deller, A. 2012, ApJL, 753, L36 +7 +The Astronomical Journal, 149:23 (7pp), 2015 January Hart et al. \ No newline at end of file diff --git a/src/main/resources/cdtocode/doc/Apache OODT File Manager/A FRAMEWORK FOR COLLABORATIVE REVIEW OF CANDIDATE EVENTS IN HIGH DATA RATE STREAMS THE V-FASTR EXPERIMENT AS A CASE STUDY.txt b/src/main/resources/cdtocode/doc/Apache OODT File Manager/A FRAMEWORK FOR COLLABORATIVE REVIEW OF CANDIDATE EVENTS IN HIGH DATA RATE STREAMS THE V-FASTR EXPERIMENT AS A CASE STUDY.txt new file mode 100644 index 0000000000000000000000000000000000000000..c92ae4773b74b21b9aa7b209b085a9f0f86cb88b --- /dev/null +++ b/src/main/resources/cdtocode/doc/Apache OODT File Manager/A FRAMEWORK FOR COLLABORATIVE REVIEW OF CANDIDATE EVENTS IN HIGH DATA RATE STREAMS THE V-FASTR EXPERIMENT AS A CASE STUDY.txt @@ -0,0 +1,637 @@ +A FRAMEWORK FOR COLLABORATIVE REVIEW OF CANDIDATE EVENTS IN HIGH DATA RATE +STREAMS: THE V-FASTR EXPERIMENT AS A CASE STUDY +Andrew F. Hart, Luca Cinquini, Shakeh E. Khudikyan, David R. Thompson, +Chris A. Mattmann, Kiri Wagstaff, Joseph Lazio, and Dayton Jones +Jet Propulsion Laboratory, California Institute of Technology, Pasadena, CA 91109, USA; andrew.f.hart@jpl.nasa.gov +Received 2014 March 24; accepted 2014 August 10; published 2014 December 16 +ABSTRACT +“Fast radio transients” are defined here as bright millisecond pulses of radio-frequency energy. These shortduration +pulses can be produced by known objects such as pulsars or potentially by more exotic objects such as +evaporating black holes. The identification and verification of such an event would be of great scientific value. This +is one major goal of the Very Long Baseline Array (VLBA) Fast Transient Experiment (V-FASTR), a softwarebased +detection system installed at the VLBA. V-FASTR uses a “commensal” (piggy-back) approach, analyzing +all array data continually during routine VLBA observations and identifying candidate fast transient events. Raw +data can be stored from a buffer memory, which enables a comprehensive off-line analysis. This is invaluable for +validating the astrophysical origin of any detection. Candidates discovered by the automatic system must be +reviewed each day by analysts to identify any promising signals that warrant a more in-depth investigation. To +support the timely analysis of fast transient detection candidates by V-FASTR scientists, we have developed a +metadata-driven, collaborative candidate review framework. The framework consists of a software pipeline for +metadata processing composed of both open source software components and project-specific code written +expressly to extract and catalog metadata from the incoming V-FASTR data products, and a web-based data portal +that facilitates browsing and inspection of the available metadata for candidate events extracted from the VLBA +radio data. +Key words: catalogs – methods: data analysis – pulsars: general – radio continuum: general +1. INTRODUCTION +One of the current primary goals of radio astronomy is to +explore and understand the “dynamic radio sky” (Cordes +et al. 2004). In contrast to generating catalogs of known +sources, this scientific thrust focuses on transient events, or +transient signals generated by persistent yet time-varying +sources. We do not yet fully understand the scope and +distribution of different transient sources, which range from +the known (e.g., active galactic nuclei, brown dwarfs, flare +stars, X-ray binaries, supernovae, gamma-ray bursts) to the +probable (e.g., exoplanets), to the possible (e.g., ET +civilizations, annihilating black holes). As noted by Cordes +et al. (2004, p.14), “most exciting would be the discovery of +new classes of sources” (italics in original). Radio telescopes +continue to increase their data collecting abilities, observing +the sky with progressively finer time resolution. Of current +particular interest is the detection and characterization of +“fast radio transients,” which last for only small fractions of a +second. +The V-FASTR experiment (Wayth et al. 2011) is one of a +new breed of radio astronomy experiments specifically +targeting fast transient radio signals. The experiment is +conducted in a fully commensal (passive) fashion, searching +for signals in the data gathered during the regular processing +activities of its host instrument. Unlike more traditional, +single-telescope observations, however, the V-FASTR +experiment simultaneously utilizes anywhere between 2 and +10 telescopes of the National Radio Astronomy Observatory +ʼs (NRAO) Very Long Baseline Array (VLBA) (Romney +2010). The VLBA consists of 10 25 m telescopes that are +positioned geographically such that no 2 are within each +otherʼs local horizon, and the V-FASTR experiment +leverages this configuration to better discriminate between +instances of terrestrial Radio Frequency Interference (RFI) +and potentially genuine astronomical pulses (Thompson +et al. 2011). +The huge volumes of raw time-series voltage data generated +by the VLBA in the course of its operation make storing the +full record of an entire observing session infeasible at the +present time. As a consequence, considerable effort has been +devoted to developing and fine-tuning algorithms for the realtime +identification of potentially interesting signals in the noisy +and often incomplete data (Thompson et al. 2011; Wayth et al. +2012). All data selected by the real-time processing step is +subsequently inspected, on a daily basis, by members of the +geographically distributed V-FASTR science team and either +discarded as spurious or archived offline for full analysis at a +later date. +The V-FASTR experiment must therefore operate within +several important resource constraints: the inability to archive +the full observational record due to space constraints, and a +practical workload constraint upon the human analysts +reviewing candidate detections. To address the latter, we have +developed a metadata-driven, collaborative candidate review +framework for the V-FASTR experiment. The framework +comprises a set of software components dedicated to the +automatic capture and organization of metadata describing the +candidate events identified as interesting by the automated +algorithms, and an online environment for the collaborative +perusal and inspection of related imagery data by the V-FASTR +analysis team. +The rest of this paper describes the system as follows. In +Section 2 we describe our project in a more general context. +Section 3 presents the methodology and an architectural +description of the system. We follow with an evaluation of +The Astronomical Journal, 149:23 (7pp), 2015 January doi:10.1088/0004-6256/149/1/23 +© 2015. The American Astronomical Society. All rights reserved. +1 +our experience deploying the framework in Section 4, and +offer conclusions and future directions for the work in +Section 5. +2. BACKGROUND +To better understand the context of the system implementation +presented in Section 3, we first briefly introduce the VFASTR +experiment and describe the development of scientific +data systems at the NASA Jet Propulsion Laboratory (JPL). +We then describe the Object Oriented Data Technology +(OODT) project, an open source information integration +platform that plays a central role in our framework. Finally, +we briefly touch upon several related efforts at developing +online tools to collaboratively classify and validate scientific +observations. +2.1. V-FASTR: The VLBA Fast TRansients Experiment +V-FASTR (VLBA Fast TRansients) is a data analysis +system used by the VLBA to detect candidate fast transient +events. Principal investigators submit observing proposals to +the VLBA targeted at galaxies, supernovae, quasars, pulsars, +and more. V-FASTR analyzes all data collected by the VLBA +as part of routine processing and produces a nightly list of +candidates identified within the data processed that day. The +raw data for each candidate is temporarily saved in case it is +needed to interpret or follow up on a particularly promising or +unusual detection. However, the raw data consumes significant +disk space and therefore the candidate list must be reviewed on +a timely basis by experts. False positives can be deleted and +their disk space reclaimed, while truly interesting events can be +re-processed to enable the generation of a sky image to localize +the source of the signal. Software tools that streamline and +simplify this review process are therefore highly valued by +candidate reviewers and can have a positive impact on other +similar efforts throughout the world. +2.2. Data System Development at JPL +The Data Management Systems and Technologies group at +the JPL develops software ground data systems to support +NASA science missions. These pipelines are specifically +optimized to support the data-intensive and computationallyintensive +processing steps often needed to convert raw remotesensing +observations into higher level data products at scale so +that they can be interpreted by the scientists. The process +almost always involves developing a close collaboration with +project scientists to obtain an understanding of the processing +algorithms involved, a sense of the scale and throughput +requirements, and other operational constraints of the expected +production environment. +Over the years the group has developed a diverse portfolio of +data system experience across a broad spectrum of domains +including earth and climate science (Mattmann et al. 2009; +Hart et al. 2011; Tran et al. 2011), planetary science, +astrophysics, snow hydrology, radio astronomy, cancer +research (Crichton et al. 2001), and pediatric intensive care +(Crichton et al. 2011). +2.3. Open Source and OODT +One of the products of this long track record of experience in +the realm of scientific data processing systems is a suite of +software components known as OODT1 originally arose out of +a desire on the part of NASAʼs Office of Space Science to +improve the return on investment for individual mission data +systems by leveraging commonalities in their design to create a +reusable platform of configurable components, on top of which +mission-specific customizations could be made. OODT thus +represents both an architecture and a reference implementation. +Its components communicate with one another over standard, +open protocols such as XML-RPC2 and can be used either +individually, or coupled together to form more complex data +processing pipelines. +In 2009 OODT began the transition from a JPL-internal +development project to a free and open source software project +at the Apache Software Foundation (ASF).3 Graduating to a +top-level project in 2011, OODT has since undergone several +public releases at the ASF and is in use by a varied group of +scientific and commercial endeavors. As we will describe +further in Section 3, several OODT components form the core +platform of our candidate validation framework. The ready +availability of OODT components under a liberal license, +combined with their substantial pedigree was appealing to our +project both for time and budgetary considerations. +2.4. Related Work +In the following section we identify several ongoing efforts +that also utilize online tools to assist in the collaborative review +and classification of scientific observations. +2.4.1. Astropulse +Astropulse is part of a series of sky surveys for radio pulses +being conducted by the Search for Extraterrestrial Intelligence +(SETI) at the University of Berkeley (Siemion et al. 2010). +The Astropulse project conducts a survey of the sky from the +Arecibo Observatory in Puerto Rico, searching for short +(microsecond) broadband radio frequency pulses. While +Astropulseʼs use of Areciboʼs enormous single dish telescope +affords excellent sensitivity, V-FASTRʼs ability to perform +continent-scale baseline interferometery yields much greater +positional accuracy when attempting to localize the source of a +signal. +As a variant of the SETI@home project, Astropulse utilizes the +same distributed, collaborative volunteer computing infrastructure +accumulated over the years by that effort to perform a +number of computationally intense transformations and calculations +of the data in an attempt to better classify the origin of any +signals detected. The use of volunteer computing to perform units +of computational work is an appealing approach that obviates the +need to directly acquire sufficient hardware for the processing +demands. However, the fully automated nature of the approach is +not a natural fit for V-FASTRʼs manual review requirement. +2.4.2. Galaxy Zoo +GalaxyZoo4 is an Internet-based project that relies on the +help of volunteers to classify a very large database of galaxy +images recorded by either the Sloan Digital Sky Survey or the +Hubble telescope. Users are asked to classify galaxies based on +1 Apache OODT: http://oodt.apache.org/ +2 XML-RPC: http://xmlrpc.scripting.com/spec.html +3 http://apache.org/ +4 Galaxy Zoo: http://www.galaxyzoo.org/ +2 +The Astronomical Journal, 149:23 (7pp), 2015 January Hart et al. +shape, color and direction of rotation, and report on possible +unidentified features. The rationale behind human intervention +is that manual classification is more accurate and insightful +than any algorithm that can currently by undertaken by an +automatic program. To date, the project has met with success +that far exceeded expectations: more than 250,000 volunteers +have helped classify millions of images, resulting in the +confirmation of important scientific hypothesis, the formulation +of new ones, and the discovery of new interesting objects. +While Galaxy Zooʼs tactic of appealing to volunteers to +mitigate the challenge of image classification at scale is +attractive, the paradigm does not translate well to the V-FASTR +setting due to differences in the nature of the archives between +the two projects. Whereas Galaxy Zoo permits its volunteer +reviewers to leisurely peruse and mine a largely static image +archive, the rapidly growing data volumes associated with +ongoing V-FASTR observations dictate that reviews must be +regularly scheduled to keep the project within its resource +limits. +2.4.3. Foldit: The Protein Folding Game +Foldit (Cooper et al. 2010) is a collaborative online protein +folding game developed by the Center for Game Science +at the University of Washington, and it represents a +“crowd-sourced” attempt to solve the computationally challenging +task of predicting protein structure. Proteins, chains of +amino acids, play a key role in a wide range of human diseases, +but comparatively little is known about how they contort +themselves into the specific shapes that determine their +function. Because of the scale and complexity of the challenge, +the researchers behind Foldit have turned to the puzzle-solving +capabilities of human beings for assistance. After learning the +rules on simple challenges, players compete against one +another to design alternative protein structures, with the goal +of arriving at an arrangement that minimizes the total energy +needed to maintain the shape. +Foldit has created an environment in which the unknown and +diverse strategies of its human participants become a core +strength. Furthermore, by presenting the scientific activity as a +competitive game, the project, which currently boasts over +400,000 players, has shown that it is possible to recruit and +leverage human processing power at scale. This provides an +interesting model for other projects, including V-FASTR, +which at some point may rely upon a human element to +augment or improve automated processes. +3. IMPLEMENTATION +In this section we provide details on the implementation of +our metadata-driven framework for online review of V-FASTR +candidate detection events. We describe our methodology and +the considerations that informed our design, followed by a +presentation of the system architecture. +3.1. Development Methodology +Several factors influenced the development process and have +left their imprint on the final architecture. We feel that our +implementation is uniquely suited to the needs of the VFASTR +project precisely because these factors were identified +early on and were thus able to influence all aspects of the +design process. +3.1.1. Collaboration +As described in Section 2, our group has developed +substantial experience in the design and implementation of +data systems for a broad range of scientific domains. In each +case, a close working relationship with members of the project +science team was an essential ingredient to the success of the +project, and our experience developing an online candidate +review framework for V-FASTR was no different. As software +engineers familiar with the challenges inherent in scientific data +management, our intuitions about the technical challenges of +the system served us well in scoping out the project timeline. +However, it was our early and regular communication with +members of the V-FASTR science team that was critical to +obtaining the domain knowledge necessary to make accurate +assumptions, and in the early identification of issues. The +current system architecture, covering both the back and front +end elements, is a direct result of an ongoing feedback loop +between the science and software teams. +3.1.2. Constraints +As mentioned in Section 2, V-FASTR is a commensal +experiment that scans for fast transients in data that is already +being collected as part of the regular third-party use of the +VLBA instrument. As such, the experiment maintains a “guest” +status on the NRAO computing infrastructure. Consequently, +care must consistently be taken not to overtax NRAO system +resources, including disk storage, CPU time, and network +bandwidth. These physical constraints motivated many of the +architectural decisions described in the following sections. +Each V-FASTR data product may contain hundreds of files, +rooted at a top-level job directory, and includes two types of +products: filterbank data (up to ~100 GB per job) and +baseband voltage data (up to ~10 GB per job). The total data +storage capacity available to V-FASTR is just ~8 TB, enough +to contain ~800 jobs of ~10 GB each (on average). Because +products are produced at a average rate of ~10–20 per day (but +sometimes in the hundreds), the storage would be exhausted +within a few weeks unless products are periodically reviewed +by the science team analysts. During review, each candidate is +either flagged for higher-resolution processing (and saved) or +discarded as a false positive and the disk space reclaimed (see +Figure 1 for an overview of the average data volumes per job at +different processing stages). The desire to provide analysts with +a streamlined method for this review process is at the very core +of our design. +Similarly, the network bandwidth constraints of the host led +us to a data transfer configuration that focused on metadata +rather than requiring the complete transfer of raw, unreviewed, +and possibly spurious detection data over the Internet. Instead, +metadata sufficient to describe the salient characteristics of a +candidate event to a trained analyst was transferred into our +candidate review framework. This careful selection process had +the beneficial side effect of greatly limiting the size of the +transferred products, allowing for a considerably longer +retention period on the ~10 TB archive hosted at JPL. +Finally, security constraints were also critically important to +the design, particularly because the system spans two separate +security domains: NRAO and JPL. To comply with the security +requirements of the host system, data transfer was configured +on the NRAO system to allow read-only operations and was +made accessible only to clients originating from the JPL +3 +The Astronomical Journal, 149:23 (7pp), 2015 January Hart et al. +domain. Furthermore, on the front-end, the functionality +exposed by the web portal component interacted only with +the local metadata archive, eliminating the possibility of +corruption or inappropriate access to the raw observational +data. +3.2. Architecture +As previously mentioned, the candidate review framework is +driven by metadata describing the candidate events to be +reviewed by V-FASTR analysts. To communicate this data +from the raw source repository at the NRAO to an analyst using +a browser anywhere in the world, we developed a software +framework consisting of two principal components: a metadata +pipeline that manages the capture, transfer, and storage of +metadata annotations, and a web portal which provides analysts +with a convenient, context-rich environment for efficiently +classifying candidate events. +3.2.1. Metadata Pipeline +On the JPL side, the V-FASTR data products are processed +through a metadata extraction and data archiving pipeline that +eventually leads to the event candidates being available for +inspection on the web portal. The pipeline is composed of three +major software components: rsync, the OODT CAS Crawler, +and the OODT File Manager, depicted in Figure 2. +rsync. Data products are automatically transferred from the +NRAO staging area to the JPL server using rsync. rsync is a +popular application and data transfer protocol that allows to +synchronize the content of a directory tree between two +servers with minimal human intervention. It was chosen +because of its simplicity, high performance, reliability, and +wide range of configuration options. Through rsync, files are +transferred in compressed format and using delta encoding, +meaning that only the file differences are sent through +subsequent transfers. For this project, an rsync server +daemon was set up on the NRAO side to expose the data +staging area where the products are collected. For security +reasons, the daemon was restricted to allow read-only +operations to clients originating from a designated JPL IP +address. On the JPL side, an rsync client was set up to run +hourly as a system cron job, transferring products to the JPL +archive area. To minimize bandwidth usage, the client only +transfers a very small subset of the data comprising a product +directory tree, namely the detection images and the output +and calibration files containing the metadata needed by the +web portal. On average, this represents a reduction of the +data product size by a factor of 3.5 ´ 103: from an average +size of ~35 GB on the NRAO server (for a product with +several detections), to ~10 MB on the JPL server. The rsync +data transfer rates between the two servers were measured to +be around ~2 MBs-1, more than enough to transfer between +10 and 20 data products per day. +CAS Crawler. Once the data products are transferred to the +JPL server, they are automatically detected by the OODT +CAS Crawler daemon, which runs at sub-hour time intervals +to pick up new products as soon as they become available. +The Crawler is responsible for notifying the OODT File +Manager and therefore starting the product ingestion process. +For this deployment, the Crawler was configured to send a +signal only if two preconditions are both satisfied: (1) a +similarly named product does not already exist in the File +Manager catalog and (2) the product directory contains a +special marker file indicating that the product has been +processed by the mail program, and therefore is in a complete +state (i.e., no files are missing). +CAS File Manager. The OODT CAS File Manager is a +customizable software component that is responsible for +processing and archiving a data product, making it available +for query and access to clients. For this project, the File +Manager was deployed with the default Apache Lucene +metadata back-end, and configured to archive products +Figure 1. Depiction of the full V-FASTR data flow with volume estimates (per job) at each stage. The candidate review framework (both metadata pipeline and web +portal components) interact with the metadata and derived products repository at the intersection of A and B above. +4 +The Astronomical Journal, 149:23 (7pp), 2015 January Hart et al. +in-place, i.e., without moving them to a separate archive +directory, otherwise the rsync process would transfer them +again from the NRAO server. Additionally, we leveraged the +extensibility of the OODT framework by configuring the File +Manager with custom metadata extractors that were +purposely written to parse the information contained in the +V-FASTR output and calibration files. Information is +extracted at the three levels that comprise the hierarchy of +a V-FASTR data product: job, scan, and event. Additionally, +a numerical algorithm was written to assign each pair of +available images (-det.jpg and -dedisp.jpg) to the event that +generated them. +In general, a File Manager can store metadata in its back-end +catalog as different object types. Each object type is defined to +contain multiple metadata fields, where each field is composed +of a named key associated to one or more string values. For this +project, the decision was made to maintain a one-to-one +correspondence between a data product and the corresponding +metadata ingested into the catalog. So rather than defining three +object types for jobs, scans, and events, a single object type +was used holding all information for a data product in a single +container, with dynamically named keys that are encoded to +contain the scan and event numbers. This decision was +motivated by the desire to simplify and optimize the querying +of information by the web portal client, since all metadata for a +product is retrieved through a single request to the File +Manager. As a consequence, the default Apache Lucene +metadata catalog implementation had to be slightly modified +to allow for the ingestion of dynamically named metadata +fields. +3.2.2. Web Portal +The second major component of the candidate review +framework is an interactive web portal. The primary purpose +of the portal is to provide a convenient online environment for +the location-independent perusal and assessment of potential +candidates in context. The portal provides V-FASTR analysts +with the ability to quickly navigate through the available +information to identify candidates worthy of further inspection +on a familiar web platform. +The portal has been implemented as a PHP web application +using the Apache OODT Balance web framework running on +top of the Apache HTTPD Web Server. OODT Balance was +chosen here for its ability to easily integrate with the OODT +components in the back-end metadata pipeline, namely the +OODT CAS File Manager described earlier. Furthermore, the +flexible, modular approach of the framework allowed us to +quickly connect the web portal to the metadata repository and +rapidly begin constructing the necessary views specific to the +V-FASTR candidate review and validation use cases. +As Figure 3 shows, the web portal offers a variety of views +of the available metadata which are hierarchically organized to +match the conceptual relationships in the data. At the highest +level, a job or run might consist of multiple scans, each of +which may itself contain multiple detection event candidates. +This hierarchy, expressed in the metadata, is preserved in the +layout of the portal views, and the breadcrumb navigation +provided to facilitate orientation within the nested structure. +At the level of an individual event candidate (Figure 3, +middle image), two graphical representations of the event are +available to assist analysts in classifying the nature of the +signal. These images are generated automatically as part of the +initial candidate identification process (Wayth et al. 2011), and +they provide a trained analyst the necessary structural clues +needed to rapidly assess the received signal as being genuinely +extraterrestrial in origin or merely a product of RFI. +To support both metadata browsing in context and the desire +for an analyst to be able to rapidly peruse the image +representations of an entire job (many events in many scans) +at once, a compromise was struck whereby, for each job, a +portal user may select a traditional, hierarchical navigation or a +flattened view in which all of the (possibly numerous) event +candidates are presented simultaneously on screen and can be +accessed serially simply by scrolling the view. +Figure 2. Component diagram for the metadata pipeline component of the VFASTR +candidate review framework. +5 +The Astronomical Journal, 149:23 (7pp), 2015 January Hart et al. +Together, the metadata pipeline and the web portal constitute +an end-to-end framework for capturing, archiving, and +presenting metadata about detected transient event candidates +to V-FASTR scientists. Furthermore, by providing a reliable +process and flexible interface, the system directly streamlines +the analysis process, boosting the overall efficiency of the +project. +4. EVALUATION +As we have described in the previous section, the candidate +review framework embraces the model of online collaborative +validation of fast transient candidates by a team of geographically +dispersed analysts, and improves the efficiency with +which analysts may classify observational data. In this section +we describe the early results of our experience with the +operational deployment of the framework, as well as highlight +several areas for the evolution of the tool to further enhance its +utility. +4.1. Experience +The initial deployment of the collaborative review framework +for operational use by the V-FASTR science team was +made in early summer 2012. The immediate feedback was +largely positive: analysts praised the capabilities of the system, +the general improved accessibility afforded by a web-based +user interface, and the newfound capability to easily navigate +rapidly through all detections in a given job, or peruse the +different levels (scans and events) within a job individually. +The biggest initial complaint with the system was that too +many mouse clicks were required to complete an analysis of all +of the candidates in an entire job. +A consequence of the iterative feedback loop that developed +between the software and science teams (described further in +Section 3) was that suggestions for improvements were +repeatedly made, tracked, and acted upon. This process resulted +in an updated release occurring approximately every two weeks +during the first few months of the deployment. Suggestions for +improvements included the addition of various metadata fields +identified as critical to the classification task, updates to the +visual organization of the elements of the web portal views, and +a relentless focus on reducing the number of mouse clicks +required on the part of analyst users. +By the time of this writing, the V-FASTR portal has been +running operationally for several weeks, and we can draw some +early conclusions on usefulness of the system. Overall, as +reported by the science team, it seems like the project has +definitely accomplished its broad goal of facilitating the +collaborative task of inspecting and screening radio-transient +events. By extracting all relevant metadata from the latest data +products, and presenting it on the web portal in a concise +fashion, scientists can now execute their tasks more efficiently, +compared to earlier times when they had to log onto a terminal +and analyze the raw data manually. Additionally, the online +availability of all data and metadata through a browser interface +(as opposed to an ssh terminal) has allowed for greater +flexibility with regard to when and where evaluations can be +performed, including for the first time on a mobile device. +4.2. Evolution +On the whole, the ability to interact with the totality of the +candidate data and metadata through a browser interface has +greatly expanded the analysts’ ability to perform their tasks +with greater flexibility regarding when and where evaluations +can be performed. This includes, for the first time, anecdotal +accounts of an analyst reviewing candidates from a mobile +device. +With this freedom, in turn, has come a number of feature +requests which can be taken together to form a roadmap of +sorts for the evolution of the framework. Now that the +interaction with candidate metadata has transitioned to the +browser, the science team has identified three key features they +feel would complete the transition and entirely replace the prior +ad-hoc methods for coordinating the analysts’ activities: +Job assignment. As mentioned in Section 3, the timely +review of detection candidates is critical to remaining within +the resource constraints imposed upon the experiment. At the +moment, review jobs are assigned to analysts via email. +Augmenting the web portal with the ability to identify an +individual analyst would enable the presentation of +a prioritized list of that analystʼs outstanding review tasks. +Effort Tracking. Along the same lines, it is important to +spread the analysis load evenly across the science team, since +no one person is performing the analysis as his or her fulltime +job. Augmenting the framework with the ability to track +the analysis contributions of individual users over time +would assist in the equitable scheduling of future +review jobs. +Figure 3. Screen shots of the initial version of the web portal component. From left to right: the portal home page displaying recent jobs and associated event counts, +image metadata associated with an individual event candidate, full metadata listing, including associated scans, for an observation job. +6 +The Astronomical Journal, 149:23 (7pp), 2015 January Hart et al. +In-browser archiving. When an analyst determines a +candidate event merits high-resolution followup, the last +step is to archive the associated raw data so that it can be +evaluated at a later date. Currently, due to the security +restrictions permitting read-only access to external connections +to the archive at the NRAO (described in Section 3), +this process is handled out-of-band by the analyst logging +into an NRAO machine and archiving the appropriate data +manually. It is possible that, with the identity management +features discussed in the previous two items (and the +associated auditing capabilities that it could entail) the +restrictions might be negotiated to the point that certain +defined activities (such as archiving a single job directory) +could be initiated from within the portal environment. +5. CONCLUSION +V-FASTR, and commensal operations more generally, are +particularly challenging experiments due to extreme data +volume and real-time requirements. Processing occurs continually, +and the data flow must be coordinated across multiple +physical locations with transport mechanisms ranging from +FedEx transport (disks from the antenna), high-bandwidth +interconnects (the correlator and transient detection systems), +daily rsync over IP (the ska-dc mirror), and distributed WWW +protocols (manual review which takes place by analysts on +three continents). Various components of the system operate on +millisecond, hourly, and daily clocks and all components must +continue operating since there is very little margin for buffer +resources. In addition, the data processing components are +highly heterogeneous, with human experts playing their own +role as scheduled pattern recognition engines in the overall +architecture. By facilitating timely review, and reducing the +learning curve for new reviewers, the V-FASTR portal will +play a critical role in keeping the data flowing and making the +system sustainable in the long term. +This effort was supported by the Jet Propulsion Laboratory, +managed by the California Institute of Technology under a +contract with the National Aeronautics and Space +Administration. +REFERENCES +Cooper, S., Khatlib, F., & Treuille, A. 2010, Natur, 466, 756–60 +Cordes, J., Lazio, T., & McLaughlin, M. 2004, NewAR, 48, 1459–72 +Crichton, D., Kincaid, H., Downing, G., Srivastava, S., & Hughes, J. S. 2001, +in Proc. of the 14th IEEE Symp. on Computer-Based Medical Systems, An +Interoperable Data Architecture for Data Exchange in a Biomedical +Research Network (Piscataway, NJ: IEEE), 65–72 +Crichton, D., Mattmann, C., Hart, A., et al. 2011, in Proc. of the 24th +IEEE Symp. on Computer-Based Medical Systems An Informatics +Architecture for the Virtual Pediatric Intensive Care Unit (Piscataway, +NJ: IEEE), 1–6 +Hart, A., Goodale, C., Mattmann, C., et al. 2011, in Proc. of the 2nd Int. +Workshop on Software Engineering for Cloud Computing, A Cloudenabled +Regional Climate Model Evaluation System (New York: +ACM), 43–49 +Mattmann, C., Crichton, D., Medvidivic, N., & Hughes, J. S. 2006, in Proc. +2006 Int. Conf. on Software Engineering, A Software Architecture-based +Framework for Highly Distributed and Data Intensive Scientific +Applications (New York: ACM), 721–30 +Mattmann, C., Freeborn, D., Crichton, D., et al. 2009, in Proc. IEEE Int. Conf. +on Space Mission Challenges for Information Technology, A Reusable +Process Control System Framework for the Orbiting Carbon Observatory +and NPP Sounder PEATE Missions (Piscataway, NJ: IEEE), 165–72 +Romney, J. D. 2010, NRAO, http://www.vlba.nrao.edu/astro/obstatus/current/ +obssum.html. +Siemion, A., von Korff, J., McMahon, P., Korpela, E., & Werthimer, D. 2010, +AcAau, 67, 1342–9 +Thompson, D., Wagstaff, K., Brisken, W., et al. 2011, ApJ, 735, 98 +Tran, J., Cinquini, L., Mattmann, C., et al. 2011, in Evaluating Cloud +Computing in the NASA DESDynI Ground Data System Proc. of the II +International Workshop on Software Engineering for Cloud Computing +(New York: ACM), 36–42 +Wayth, R., Brisken, W., Deller, A., et al. 2011, ApJ, 735, 97 +Wayth, R., Tingay, S., & Deller, A. 2012, ApJL, 753, L36 +7 +The Astronomical Journal, 149:23 (7pp), 2015 January Hart et al. \ No newline at end of file diff --git a/src/main/resources/cdtocode/doc/Apache OODT File Manager/A Reusable Process Control System Framework for the Orbiting Carbon Observatory and NPP Sounder PEATE missions-relation.txt b/src/main/resources/cdtocode/doc/Apache OODT File Manager/A Reusable Process Control System Framework for the Orbiting Carbon Observatory and NPP Sounder PEATE missions-relation.txt new file mode 100644 index 0000000000000000000000000000000000000000..de3ecb351d1cf7a834e774d043c5e3cc5845f0eb --- /dev/null +++ b/src/main/resources/cdtocode/doc/Apache OODT File Manager/A Reusable Process Control System Framework for the Orbiting Carbon Observatory and NPP Sounder PEATE missions-relation.txt @@ -0,0 +1,716 @@ +See discussions, stats, and author profiles for this publication at: https://www.researchgate.net/publication/232619682 +A Reusable Process Control System Framework for the Orbiting Carbon +Observatory and NPP Sounder PEATE missions +Article · July 2009 +DOI: 10.1109/SMC-IT.2009.27 +CITATIONS +21 +READS +90 +11 authors, including: +Some of the authors of this publication are also working on these related projects: +Airborne Snow Observatory View project +The Planetary Data System PDS4 Information Model-Driven Architecture View project +Daniel J. Crichton +NASA +138 PUBLICATIONS 791 CITATIONS +SEE PROFILE +Sean Hardman +NASA +20 PUBLICATIONS 53 CITATIONS +SEE PROFILE +Paul Ramirez +NASA +39 PUBLICATIONS 408 CITATIONS +SEE PROFILE +Sean Colin-Patrick Kelly +NASA +43 PUBLICATIONS 213 CITATIONS +SEE PROFILE +All content following this page was uploaded by Paul Ramirez on 22 May 2014. +The user has requested enhancement of the downloaded file. +A Reusable Process Control System Framework for the Orbiting Carbon +Observatory and NPP Sounder PEATE missions +Chris A. Mattmann, Dana Freeborn, Dan Crichton, Brian Foster, +Andrew Hart, David Woollard, Sean Hardman, Paul Ramirez, +Sean Kelly, Albert Y. Chang, Charles E. Miller +Jet Propulsion Laboratory +California Institute of Technology +Pasadena, CA 91109, USA +mattmann@jpl.nasa.gov +Abstract +We describe a reusable architecture and implementation +framework for managing science processing pipelines for +mission ground data systems. Our system, dubbed “PCS”, +for Process Control System, improves upon an existing software +component, the OODT Catalog and Archive (CAS), +which has already supported the QuikSCAT, SeaWinds and +AMT earth science missions. This paper focuses on PCS +within the context of two current earth science missions: the +Orbiting Carbon Observatory (OCO), and NPP Sounder +PEATE projects. +1 Introduction +Data volume and computational needs for Earth science +missions at NASA are growing by orders of magnitude. The +low cost of disk storage space and the increasing power +and pervasiveness of high performance computing have engendered +an era in which previously unimaginable science +questions can be answered in years rather than decades. +These science questions range from the study of sea surface +temperatures to observe maritime pollution, to measuring +atmospheric chemical composition for weather forecasting, +to obtaining a better understanding of the Earth’s global carbon +cycle and climate change as a whole. +A significant portion of any space-based NASA earth +science mission is a Ground Data System (GDS). The GDS +is responsible for receiving raw spacecraft data as delivered +from a ground station1, and processing the information +through several focused series of steps with the goal of +1A strategically placed data center on Earth with ample ground-tospace +bandwidth and connectivity for receiving satellite data. +delivering the scientific value encoded in the data to interested +scientists, both locally at an instrument team center, +and then to universities, decision makers, and the broader +science community. The processing that a GDS must perform +ranges from mundane activities including data (un- +)marshalling (removal of special space “header” information), +and subsetting, to more involved processes including +temporal and spatial positioning, calibration, and statistical +analysis, to complex scientific assimilation including +prospective and retrospective physical modeling of a scene. +Beginning with Automated Multi-Mission Operations +System (AMMOS) Multi-mission Ground Data System +(MGDS) in the early 1990s, our work has focused on building +reusable software components for GDS systems. As +an example, the Central Data Base (CDB) Subsystem of the +MGDS included data base management software comprised +of metadata and file management, file transfer capabilities, +user interfaces and data storage facilities to support multimission +telemetry data streams for current and future planetary +missions. This demanded that the CDB architecture +adhere to the architectural principles of extensibility, scalability, +and reusability. Because the CDB was and is part of +a larger system that included controlled, centralized hardware, +these architectural principles of CDB were satisfied +for AMMOS by simply ensuring that the CDB was data and +policy driven. +Our ensuing work on the Alaska SAR Facility (ASF) and +NASA Scatterometer (NSCAT) projects, made clear two +significant trends: 1) neither of these missions were part +of the controlled, centralized system for which the CDB +was developed and 2) the data management requirements +for these two missions were different from each other and +AMMOS. This meant that 1) hardware and platform choices +could not be assumed and 2) additional capabilities not originally +required for AMMOS had to be developed. In order +to meet mission schedule and cost constraints, developers +for each project independently employed a method we +coined “rapid adaptation” of the original CDB software that +resulted in two very successful mission data systems with +ultimately very few similarities or shared code. +At the time the NSCAT follow-on mission (SeaWinds on +ADEOS II) was ramping up, a technology task originally +funded by the NASA Office of Space Science was focused +on architecting and developing a common, standards-based +software framework dubbed Object Oriented Data Technology +(OODT) [12]. OODT provided “out of the box” core +data management software services while remaining adaptable +to address the (potentially evolving) requirements that +are unique from mission to mission. +Several authors of this paper supporting SeaWinds and +the OODT technology task decided to collaborate to create +a platform- and database-independent service for managing +files and tasks. The result of this collaboration was +the OODT Catalog and Archive Service component that +was architected to be reusable, reliable and scalable. The +SeaWinds (on QuikSCAT and ADEOS II) and Advanced +Communications Technology Satellite (ACTS) Mobile Terminal +(AMT) projects benefited greatly from employing +the CAS component to support their science data systems. +QuikSCAT is in its 10th year of a planned 2-year mission +and is continuing to function in a nearly lights out mode. +Hardware has been added to the system to support the unplanned +data and processing volumes (reprocessing of 7 +years of data completed in 6 months, simultaneous with +daily operations) by simply changing the software configuration. +No software engineers were required to extend the +system. +While the CAS component successfully supported Sea- +Winds and AMT, the following JPL earth missions, Orbiting +Carbon Observatory (OCO) and NPP Sounder PEATE, +needed to support far more complex processing (greatly increased +data volumes and processing throughput) and various +hardware and platform configurations. This forced us to +rethink the CAS component implementation which resulted +in 1) the refactoring of the CAS component into two distinct +components, the File Manager and the Workflow Manager +and 2) the development of a third component to provide a +standard interface to various hardware and platform configurations, +the Resource Manager. +The refactoring of the CAS into the File Manager and the +Workflow Manager components solved several issues. First, +it decoupled the initiation of a workflow from the ingestion +of a file. Therefore, while workflows can be initiated based +on the ingestion of a particular file or file type, they can also +be initiated based on other events such as a specific time of +day, an operator request or a software request. Second, the +refactoring provides developers and system designers the +ability to utilize only the components they need. And third, +the refactoring supports independent evolution of the components, +and thus capabilities. The combination of these +three refactored CAS components have come to be known +as the Process Control System, or PCS. +In addition to the File Manager, Workflow Manager +and Resource Manager components that provide common +reusable capabilities for file and metadata management, +pipeline processing and job submission, we have also developed +reusable interfaces to these components to provide +additional commonly required capabilities for science data +management systems. To support the automation of file ingestion, +we have developed a configurable push-pull framework +and crawler framework. To provide easy integration +of science code in order to support all phases of algorithm +development (testbed, operations and science computing facility), +the PCS Task Wrapper has been developed. +In this paper we will describe our core PCS components, +their architecture, how they helped us solve problems on +OCO and NPP Sounder PEATE, and how they are positioning +us for the future of Earth science mission work. We believe +such work will necessitate the same spirt of architectural +reuse, understanding and mission specific adaptation +that led to the genesis of the modern PCS and that will ultimately +lead to its future evolution. We will argue in this paper +that our PCS uniquely positions us in the state of the art +in constructing large-scale, distributed, data-intensive GDS +software for NASA Earth science missions. +The rest of this paper is organized as follows. Section 2 +provides further background and related efforts in the areas +of grid computing, workflow systems and science data systems. +Section 3 describes the core PCS architectural components +in greater detail. Section 4 presents our experience +leveraging the PCS on OCO and NPP Sounder PEATE. Section +5 rounds out the paper with conclusions and highlights +our planned future work. +2 Background and Related Work +Since the development of the computational grid [8] as +a means for the virtualization and sharing of processing +and storage resources across organizational and geographic +boundaries, many groups and organizations have recognized +the power of the grid as an enabler of large-scale scientific +research. In this paper, we discuss ongoing software +projects and research initiatives relevant to the PCS. +2.1 Grid Systems +The Globus toolkit [9], developed by The Globus Alliance, +is a collection of open-source software tools for developing +distributed computing systems and applications. +The toolkit provides users with a suite of software components +and libraries that can either be used individually or +packaged together to implement the many aspects of a distributed, +service-oriented infrastructure including security, +resource and data discovery, access, and management, and +communication modules customized for a particular gridbased +effort. +2.2 Workflow Systems +The past ten years have witnessed an explosion in the +number of workflow languages and software systems developed +to support scientific workflows. Yu and Buyya [15] +attempted to taxonomize these scientific workflow systems, +largely according the underlying technologies with which +they were built. In addition to this taxonomy, Woollard, et. +al., presented a characterization of workflow systems based +the intended scientific use [14]. Specifically, the authors +classified certain workflow systems as Production Systems, +of which both the OCO and NPP Sounder PEATE ground +data systems are examples. +2.2.1 Condor +Condor [11] is a grid-based job scheduling system developed +at the University of Wisconsin Madison which aims, +among other things, to improve the effective usage of available +distributed computing and storage resources by detecting +and exploiting machine idle cycles. Condor provides +mechanisms for job queuing, setting scheduling policies, +and general resource management and monitoring. Condor +insulates users from the particulars of the details of the underlying +infrastructure by transparently handling decisions +about when and where jobs will be scheduled, monitoring +their execution, and producing notifications of completion. +While originally designed to operate in a workstation environment, +a variant of Condor, Condor-G [10], leverages the +Globus toolkit to provide a Condor implementation that is +interoperable with Globus-based grids. +2.2.2 Pegasus +Pegasus [7] is similar to Condor in that it provides a layer of +abstraction between the jobs to be processed and the hardware +that they will eventually be processed on. Developed +at the USC Information Science Pegasus is capable of dynamically +assigning computational workflows with multiple +processing steps to a large number of grid-based compute +nodes based on resource availability. In addition to generating +an initial workflow mapping, Pegasus offers the ability +to transparently remap a workflow, increasing the reliability +of the system in the event of failure in a small number of +compute nodes. +2.3 Science Data Processing Systems +Science Data Processing Systems provide the base level +of service needed to effectively manage the vast quantities +of intermediate and final data products generated by largescale, +computationally intensive research tasks. While there +are a large number of systems in operation, we focus our +discussion on those which provide services distinctly similar +to the PCS. +2.3.1 S4PA +The Simple, Scalable, Script-based Science Product +Archive (S4PA) [3], is a storage architecture developed and +deployed at NASAs Goddard Space Flight Center in support +of the operation of the Goddard Earth Science Data +and Information Services Center (GES DISC). As cost was +a primary factor in the development of S4PA, the developers +have taken pains to streamline the system. Hosting the +primary copy of all data online reduced the need for costly +physical media distribution, and utilizing the UNIX directory +structure, in combination with metadata-encoded filenames, +provides a simplified mechanism for archive and retrieval. +As its name implies, the S4PA is primarily a data archive +service. The PCS, as described in this paper, addresses data +archiving, but takes a more architecturally grounded approach, +eschewing scripts in favor of first-class architectural +components and connectors to implement complete, endto- +end data processing pipelines. Furthermore, as complete +science data processing pipelines are composed of a large +number of complimentary, interconnected services, a formal +architectural underpinning helps to provide unity and +cohesion among the constituent components. +2.4 Standards +Grid-based science data processing systems have matured +sufficiently for common themes, lessons, and challenges +to emerge among the many participants. As a result, +there are several ongoing efforts to codify the shared knowledge +and experience into formal standards. We discuss the +Open Grid Framework and the Open Archives Initiatives +Protocol for Metadata Harvesting. +2.4.1 OGF +The Open Grid Forum [2] is actively developing standards +and specifications with the goal of spreading the adoption +of grid-based software systems. The OGF is comprised +of business, government, scientific, and academic organizations +and focuses on interoperability as the key to expanding +the utilization of grids. Through both advocacy and policy, +the OGF represents an independent voice on the role of +grids, and their potential to aid modern research. +2.4.2 OAI +The Open Archives Initiative [1] also promotes standards +for interoperability and has developed, among others, the +Protocol for Metadata Harvesting (OMI-PMH). The goal +of the OMI-PMH is to improve application interoperability +by enabling consistency in the way metadata (data about +data) is exposed, accessed, and interpreted. By providing +a flexible, extensible standard interface to the rich array +of application-specific metadata currently stored in nonuniform, +distributed repositories, the OAI hopes to facilitate +the broader accessibility and usability of distributed data resources. +3 PCS Core Architecture +In this section, we describe the PCS core components. +The three PCS manager components, File Manager, Workflow +Manager, and Resource Manager, are daemon-like web +service components responsible for answering basic questions +regarding file locations, metadata, task control and +data flow, and resource availability, monitoring, and usage. +The three PCS frameworks together implement one of +two critical higher level services in data processing systems: +(1) managing the ingestion and acquisition of remotely acquired +datasets, handled via the Crawler Framework and +Push Pull components ; and (2) managing pipeline processing, +product ingestion and data production, handled via the +PCS Task Wrapper. We will describe each component in +greater detail below. The overall PCS architecture described +in this architecture is given in Fig. 1. +3.1 File Manager +The File Manager component is responsible for tracking, +ingesting and moving file data and metadata between a +client system and a server system. The File Manager is an +extensible software component that provides an XML-RPC +external interface, and a fully tailorable Java-based API for +file management. The critical objects managed by the File +Manager include: +Products - Collections of one or more files, and their associated +Metadata. +Metadata - A map of key to multiple values of descriptive +information about a Product. +References - Pointers to a Product file’s original location, +and to its final resting location within the archive constructed +by the File Manager. +Product Type - Descriptive information about a Product +that includes what type of file Uniform Resource Identifier +(URI) [5] generation scheme to use, the root +repository location for a particular Product, and a description +of the Product. +Element - A singular Metadata element, such as “Author”, +or “Creator”. Elements may have additional metadata, +in the form of the associated definition and even a corresponding +Dublin Core [4] attribute. +Versioner - A URI generation scheme for Product Types +that defines the location within the archive (built by +the File Manager) where a file belonging to a Product +(that belongs to the associated Product Type) should be +placed. +Each Product contains one or more References, and one +Metadata object. Each Product is a member of a single +Product Type. The Metadata collected for each Product is +defined by a mapping of Product Type to one or more Elements. +Each Product Type has an associated Versioner. +3.2 Workflow Manager +TheWorkflow Manager component is responsible for description, +execution, and monitoring of Workflows, using a +client, and a server system. Workflows are typically considered +to be sequences of tasks, joined together by control +flow, and data flow, that must execute in some ordered +fashion. Workflows typically generate output data, perform +routine management tasks (such as email, etc.), or describe +a business’s internal routine practices [14]. The Workflow +Manager is an extensible software component that provides +an XML-RPC external interface, and a fully tailorable Javabased +API for workflow management. The critical objects +managed by the Workflow Manager include: +Events - are what triggerWorkflows to be executed. Events +are named, and contain dynamic Metadata information, +passed in by the user. +Metadata - a dynamic set of properties, and values, provided +to a WorkflowInstance via a user-triggered +Event. +Workflow - a description of both the control flow, and data +flow of a sequence of tasks (or stages that must be executed +in some order. +Workflow Instance - an instance of a Workflow, typically +containing additional runtime descriptive information, +such as start time, end time, task wall clock time, etc. +A WorkflowInstance also contains a shared Metadata +context, passed in by the user who triggered theWorkflow. +This context can be read/written to by the underlying +WorkflowTasks, present in a Workflow. +Workflow Tasks - descriptions of data flow, and an underlying +process, or stage, that is part of a Workflow. +Workflow Task Instances - the actual executing code, or +process, that performs the work in the Workflow Task. +Workflow Task Configuration - static configuration +properties, that configure a WorkflowTask. +Workflow Conditions - any pre (or post) conditions on the +execution of a WorkflowTask. +Workflow Condition Instances - the actual executing +code, or process, that performs the work in the Workflow +Condition. +Each Event initiates one or more Workflow Instances, +providing a Metadata context (submitted by an external +user). Each Workflow Instance is a run-time execution +model of a Workflow. Each Workflow contains one or +more Workflow Tasks. Each Workflow Task contains a single +Workflow Task Configuration, and one or more Workflow +Conditions. Each Workflow Task has a corresponding +Workflow Task Instance (that it models), as does each +Workflow Condition have a corresponding Workflow Condition +Instance. +3.3 Resource Manager +The Resource Manager component is responsible for excecution, +monitoring and traacking of jobs, storage and networking +resources for an underlying set of hardware resources. +The Resource Manager is an extensible software +component that provides an XML-RPC external interface, +and a fully tailorable Java-based API for resource management. +The critical objects managed by the Resource Manager +include: +Job - an abstract representation of an execution unit, that +stores information about an underlying program, or execution +that must be run on some hardware node ,including +information about the Job Input that the Job +requires, information about the job load, and the queue +that the job should be submitted to. +Job Input - an abstrct representation of the input that a Job +requires. +Job Spec - a complete specification of a Job, including its +Job Input, and the Job definition itself. +Job Instance - the physical code that performs the underlying +job execution. +Resource Node - an available execution node that a Job is +sent to by the Resource Manager. +Each Job Spec contains exactly one Job, and Job Input. +Each Job Input is provided to a single Job. Each Job describes +a single Job Instance. And finally, each Job is sent +to exactly one Resource Node. +3.4 Crawler Framework +The Crawler Framework was an effort to standardize the +common ingestion activities that occur both in data acquisition +and archival, as well as those that occur in pipeline +processing. These types of activities regularly involve identification +of files and directories to crawl (based on e.g., +mime type, regular expressions, or direct user input), satisfaction +of ingestion pre-conditions (e.g., the current crawled +file has not been previously ingested), followed by metadata +extraction. After metadata extraction, crawled data follows +a standard three state lifecycle: (1) preIngestion - where +e.g., a file may be unzipped or pre-processed prior to ingestion; +(2) postIngest success, indicating a successful ingestion +has occurred and e.g., the origin data file from the +ingest area should be deleted; and (3) postIngest failure, indicating +that ingestion was not successful and some corrective +action, e.g,. moving the failed file to a failure area for +later examination, should occur. +To date, we have identified three types of Product +Crawlers, where each Crawler varies along the lines of customized +precondition verification, crawilng strategy, and +need for metadata extraction. The StdProductCrawler assumes +that a Metadata object has already been generated +and included with a Product prior to ingestion, so no further +work is required to generate Metadata from a Product – +the Product is ready to be ingested. The MetExtractorProductCrawler +is responsible for generating a Metadata object +dynamically, as files are encountered during the crawling +process. Finally, the AutoDetectCrawler uses a content +type identification and regular-expressions to identify Product +Types dynamically, and then defaults to the behavior of +the MetExtractorProductCrawler for Product Types identified +via content detection. The critical objects managed by +the Crawler Framework are: +Crawler Action - is attached to one or more of the three +phases, and when a ProductCrawler enters a given +phases, all the CrawlerActions attached to that phase +are executed. The valid phases are: preIngest, +postIngestSuccess and postIngestFailure. +Precondition Comparator - is used by MetExtractorProductCrawler +and AutoDetectProductCrawler. They are +part of those ProductCrawlers customized implementation +of precondition verification that identify appropriate +times to stifle or allow metadata extractor and +ultimately ingestion, to occur. +Metadata Extractor - is run by the MetExtractorProductCrawler +and the AutoDetectProductCrawler to +generate Metadata for a Product file based on some +business rules and logic. +3.5 Push Pull Framework +The Crawler Framework supports many generic ingestion +services, including metadata extraction, crawling, and +ingestion, however, one service that necessitated further +work was the development of a protocol layer allowing +a ProductCrawler to obtain content using protocol plugins +that download content using implementations of remote +protocols such as HTTP, FTP, WinNT file system, HTTPS, +etc. +The Push Pull Framework is responsible for remote data +acquisition and acceptance over modern web protocols, +such as those mentioned above. The Push Pull Framework +is flexible in that it provides the ability to plug in different +Metadata Extractors, Data Protocols, Content Types, +etc. The framework supports parallel file transfers and data +downloads, email-based push data acceptance using IMAP, +SMTP protocols, and the ability to configure “Virtual” remote +directories (based on Metadata such as Date/Time) +from which files can be downloaded. +The critical objects managed by the Push Pull Framework +are: +Retrieval Method - defines the manner in which files are +retrieved from remote sites. It is given a configuration +file, a the Parser for the file, and a FileRetrievalSystem +(which handles all the complexities of multi-threaded +file downloading). There are currently two out-of-thebox +RetrievalMethods: RemoteCrawler and ListRetriever. +RemoteCrawler is a configurable remote site +directory and file regular expression filterable crawler. +ListRetriever will download a given list of file URIs +[5]. +Parser - parses a given configuration file into a Virtual- +FileStructure which is use to filter URIs to download. +Protocol - handles file transfer and communication via +some transfer protocol. Currently implemented Protocols +include: sftp, ftp, http, imaps, file (localhost). +3.6 PCS Task Wrapper +The PCS Task Wrapper framework is responsible for +standardizing the setup, process initiation, execution and +file management tasks surrounding execution of NASA +Product Generation Executives, or PGEs. PGEs codify a +scientific algorithm, some step in the overall scientific process +involved in a mission science workflow. +The PCS Task Wrapper provides a stable operating environment +to the underlying PGE during its execution lifecycle. +If the PGE requires a file, or metadata regarding the +file, the PCS Task Wrapper is responsible for delivering that +information to the PGE in a manner that meets its requirements. +If the PGE requires knowledge of upstream or downstream +PGEs in a sequence of executions, that information +is also made available, and finally if information regarding +disk space, node information such as CPU availability, etc., +is required the PCS Task Wrapper provides this information +to the underlying PGE. After this information is collected, +the PGE is executed and its output Product file and +Metadata generation is managed via the PCS Task Wrapper +framework. The PCS Task Wrapper is responsible for +marshalling output Products and Metadata back to the File +Manager for use in downstream data processing and pedigree. +In support of this, the PCS Task Wrapper leverages +the Crawler Framework to ingest (during pipeline processing) +the output Product files and Metadata produced by the +PGE. +As can be gleaned from the above discussion, the PGE +Task Wrapper is really the unifying bridge between the execution +of a step in the overall processing pipeline, and the +available PCS component services and the information that +they collectively manage. +The critical objects managed by the PCS Task Wrapper +are: +PGETaskInstance - an abstract class which contains a +generalized set of actions usually performed when running +PGEs. Every variable and method is protected, +thus allowing subclasses to easily modify just those +generalized actions which need to be customized for +different PGE. +Pge Config File Builder - builds a PgeConfig object and +set additional Metadata which codifies the information +necessary for orchestrating a PGE through its lifecycle. +The PCS Task Wrapper is based on a simple but +powerful XML syntax which allows a scientist to simply +fill out an xml file to describe the necessary steps +to execute a PGE. +Config File Property Adder - builds the Pge Config file +object and sets custom PGE Metadata. This allows +for a general PgeConfigBuilder with different Config- +FilePropertyAdders for setting PGE specific fields in +the PgeConfig object. +Science Pge Config File Writer - passes a PGE run information +via configuration files. This object allows +for any number of config files in any desired format +to be generated describing PGE input and those files +to be delivered to the PGE. The PCS Task Wrapper +provides existing implementations, including a deFigure +1. Component Interaction Within the PCS +fault XML Stylesheet Language (XSL) Transformation +based SciPgeConfigFileWriter. +Pcs Met File Writer - aids in generating Metadata objects +associated with PGE output products. +4 Experience and Evaluation +We have successfully applied the Process Control System +(PCS) to existing NASA missions: the Orbiting Carbon +Observatory (OCO) mission, and the NPP Sounder PEATE +mission. Both systems involve tasks such as high throughput +job processing, terabyte-scale data management, and +science computing facilities. +4.1 Orbiting Carbon Observatory Mission +On OCO, the mission is using the File Manager to ingest +MODIS, CloudSat and other ancillary data products +for use in the high performance Level 2 Science Algorithm. +To date, OCO has already used the PCS software to process +over four terabytes of Fourier Transform Spectrometer +(FTS) data provided by ground-based instruments located +around the country (e.g., Park falls, Montana, and Darwin, +Australia), and has used the software to support Instrument +Thermal Vacuum (TVAC) testing, processing 100% of all +data taken by the OCO instrument during TVAC. Also, the +PCS supports a science computing facility in which variants +of scientific software can be excursive prior to inclusion in +an operations Pipeline. +4.2 NPP Sounder PEATE Mission +Specifically NPP Sounder PEATE has already used the +File Manager and Workflow Manager to ingest and process +hundreds of gigabytes of IASI data (and is in preparation to +accept CRIMS data). Also on PEATE, the PCS is currently +being used to re-catalog over fifteen million existing science +data products from the NASA AIRS missions TLSCF. +In addition, the Resource Manager will be used on NPP to +support job processing across an eighty-node cluster. +4.3 Further Applications +In addition to the two aforementioned NASA missions, +the PCS framework is being leveraged on reimbursable +work for the National Cancer Institute (NCI)’s Early Detection +Research Network (EDRN) [6]. JPL leads the informatics +efforts on EDRN, and the PCS framework is being +used in the collection, annotation and dissemination of raw +scientific data supporting the early detection of cancer to +scientists across the country. +In the next year, PCS will also be used to support a new +JPL-led NASA mission, the Soil Moisture Active Passive +(SMAP) mission. The science computing facility designs +on OCO and NPP have been used to create an algorithm +testbed for SMAP scientists early in the design phase of the +mission so that software integration risks can be mitigated +during mission development [13]. +5 Conclusions and Future Work +While the norm for earth science missions has been for +each mission to develop their own one-off science data system +from scratch, the continual decrease in mission funding +combined with the exponential increase in mission complexity +(data volume and processing throughput) over the +last decade has made this approach pass´e and risky. It was +clear that the need for a new approach was eminent. +To this end, we have developed a standards-based software +framework to provide common science data system +services that yields the benefits of reuse while remaining +adaptable to address the requirements that are unique to the +customer. This reusable software is centered around the +most basic science data system functions that support file +and metadata management, workflow management, and resource +management. Additional frameworks augment the +core capabilities to provide automation for remote data acquisition, +data ingestion and standard pipeline processing. +This reusable software framework is the Process Control +System (PCS) we have described in this paper. +While the PCS has successfully supported the Orbiting +Carbon Observatory (OCO) and NPP Sounder PEATE +missions, upcoming missions in NASAs Decadal Survey +present additional challenges. The JPL-led Soil Moisture +Active Passive (SMAP) Mission (currently in formulation +phase) will be using the PCS not only for operations, but +also for the algorithm testbed and the science computing facility. +Providing the operational infrastructure to the algorithm +team early in the mission lifecycle will greatly reduce +the cost and risk of development-to-operations for the most +costly and risky aspect of most earth science data systems, +the algorithms. However, this also means that easy integration +of algorithms and dynamic workflow specification +are our current focus for extending the PCS capabilities. +Not far behind SMAP is another JPL-led mission, Deformation, +Ecosystem Structure and Dynamics of Ice (DESDynI) +Mission. The challenges of DESDynI are requiring +us to consider the deployment of PCS components to support +a grid architecture, supporting distributed file management +and processing capabilities supported by centralized +access to a virtual science data system. +Acknowledgements +This effort was supported by the Jet Propulsion Laboratory, +managed by the California Institute of Technology +under a contract with the National Aeronautics and Space +Administration. +References +[1] Open archives initiative, http://www.openarchives.org. +[2] Open grid forum, http://www.ogf.org. +[3] S4pa, http://daac.gsfc.nasa.gov/techlab/s4pa/index.shtml. +[4] Dublin core metadata element set, 1999. +[5] T. Berners-Lee, R. Fielding, and L. Masinter. Uniform resource +identifiers (uri): Generic syntax. Technical Report +RFC 2396, 1998. +[6] D. Crichton, S. Kelly, C. Mattmann, Q. Xiao, J. S. Hughes, +J. Oh, M. Thornquist, D. Johnsey, S. Srivastava, L. Essermann, +and W. Bigbee. A distributed information services +architecture to support biomarker discovery in early detection +of cancer. In e-Science, page 44, 2006. +[7] E. Deelman, J. Blythe, Y. Gil, C. Kesselman, G. Mehta, +S. Patil, M.-H. Su, K. Vahi, and M. Livny. Pegasus: Mapping +Scientific Workflows onto the Grid. 2004. +[8] I. Foster. The anatomy of the grid: Enabling scalable virtual +organizations. pages 6–7, 2001. +[9] I. Foster. Globus toolkit version 4: Software for serviceoriented +systems. pages 2–13. 2005. +[10] J. Frey, T. Tannenbaum, M. Livny, I. Foster, and S. Tuecke. +Condor-g: A computation management agent for multiinstitutional +grids. Cluster Computing, 5(3):237–246, July +2002. +[11] M. J. Litzkow, M. Livny, and M.W. Mutka. Condor-a hunter +of idle workstations. pages 104–111, 1988. +[12] C. Mattmann, D. J. Crichton, N. Medvidovic, and +S. Hughes. A software architecture-based framework for +highly distributed and data intensive scientific applications. +In ICSE, pages 721–730, 2006. +[13] D. Woollard, O. ig Kwoun, T. Bicknell, S. Dunbar, and +K. Leung. A science data system approach for the smap +mission. In IEEE Radar, 2009. +[14] D. Woollard, N. Medvidovic, Y. Gil, and C. A. Mattmann. +Scientific software as workflows: From discovery to distribution. +Software, IEEE, 25(4):37–43, 2008. +[15] J. Yu and R. Buyya. A taxonomy of workflow management +systems for grid computing, Apr 2005. +View publication stats \ No newline at end of file diff --git a/src/main/resources/cdtocode/doc/Apache OODT File Manager/A Reusable Process Control System Framework for the Orbiting Carbon Observatory and NPP Sounder PEATE missions-simEnts.txt b/src/main/resources/cdtocode/doc/Apache OODT File Manager/A Reusable Process Control System Framework for the Orbiting Carbon Observatory and NPP Sounder PEATE missions-simEnts.txt new file mode 100644 index 0000000000000000000000000000000000000000..de3ecb351d1cf7a834e774d043c5e3cc5845f0eb --- /dev/null +++ b/src/main/resources/cdtocode/doc/Apache OODT File Manager/A Reusable Process Control System Framework for the Orbiting Carbon Observatory and NPP Sounder PEATE missions-simEnts.txt @@ -0,0 +1,716 @@ +See discussions, stats, and author profiles for this publication at: https://www.researchgate.net/publication/232619682 +A Reusable Process Control System Framework for the Orbiting Carbon +Observatory and NPP Sounder PEATE missions +Article · July 2009 +DOI: 10.1109/SMC-IT.2009.27 +CITATIONS +21 +READS +90 +11 authors, including: +Some of the authors of this publication are also working on these related projects: +Airborne Snow Observatory View project +The Planetary Data System PDS4 Information Model-Driven Architecture View project +Daniel J. Crichton +NASA +138 PUBLICATIONS 791 CITATIONS +SEE PROFILE +Sean Hardman +NASA +20 PUBLICATIONS 53 CITATIONS +SEE PROFILE +Paul Ramirez +NASA +39 PUBLICATIONS 408 CITATIONS +SEE PROFILE +Sean Colin-Patrick Kelly +NASA +43 PUBLICATIONS 213 CITATIONS +SEE PROFILE +All content following this page was uploaded by Paul Ramirez on 22 May 2014. +The user has requested enhancement of the downloaded file. +A Reusable Process Control System Framework for the Orbiting Carbon +Observatory and NPP Sounder PEATE missions +Chris A. Mattmann, Dana Freeborn, Dan Crichton, Brian Foster, +Andrew Hart, David Woollard, Sean Hardman, Paul Ramirez, +Sean Kelly, Albert Y. Chang, Charles E. Miller +Jet Propulsion Laboratory +California Institute of Technology +Pasadena, CA 91109, USA +mattmann@jpl.nasa.gov +Abstract +We describe a reusable architecture and implementation +framework for managing science processing pipelines for +mission ground data systems. Our system, dubbed “PCS”, +for Process Control System, improves upon an existing software +component, the OODT Catalog and Archive (CAS), +which has already supported the QuikSCAT, SeaWinds and +AMT earth science missions. This paper focuses on PCS +within the context of two current earth science missions: the +Orbiting Carbon Observatory (OCO), and NPP Sounder +PEATE projects. +1 Introduction +Data volume and computational needs for Earth science +missions at NASA are growing by orders of magnitude. The +low cost of disk storage space and the increasing power +and pervasiveness of high performance computing have engendered +an era in which previously unimaginable science +questions can be answered in years rather than decades. +These science questions range from the study of sea surface +temperatures to observe maritime pollution, to measuring +atmospheric chemical composition for weather forecasting, +to obtaining a better understanding of the Earth’s global carbon +cycle and climate change as a whole. +A significant portion of any space-based NASA earth +science mission is a Ground Data System (GDS). The GDS +is responsible for receiving raw spacecraft data as delivered +from a ground station1, and processing the information +through several focused series of steps with the goal of +1A strategically placed data center on Earth with ample ground-tospace +bandwidth and connectivity for receiving satellite data. +delivering the scientific value encoded in the data to interested +scientists, both locally at an instrument team center, +and then to universities, decision makers, and the broader +science community. The processing that a GDS must perform +ranges from mundane activities including data (un- +)marshalling (removal of special space “header” information), +and subsetting, to more involved processes including +temporal and spatial positioning, calibration, and statistical +analysis, to complex scientific assimilation including +prospective and retrospective physical modeling of a scene. +Beginning with Automated Multi-Mission Operations +System (AMMOS) Multi-mission Ground Data System +(MGDS) in the early 1990s, our work has focused on building +reusable software components for GDS systems. As +an example, the Central Data Base (CDB) Subsystem of the +MGDS included data base management software comprised +of metadata and file management, file transfer capabilities, +user interfaces and data storage facilities to support multimission +telemetry data streams for current and future planetary +missions. This demanded that the CDB architecture +adhere to the architectural principles of extensibility, scalability, +and reusability. Because the CDB was and is part of +a larger system that included controlled, centralized hardware, +these architectural principles of CDB were satisfied +for AMMOS by simply ensuring that the CDB was data and +policy driven. +Our ensuing work on the Alaska SAR Facility (ASF) and +NASA Scatterometer (NSCAT) projects, made clear two +significant trends: 1) neither of these missions were part +of the controlled, centralized system for which the CDB +was developed and 2) the data management requirements +for these two missions were different from each other and +AMMOS. This meant that 1) hardware and platform choices +could not be assumed and 2) additional capabilities not originally +required for AMMOS had to be developed. In order +to meet mission schedule and cost constraints, developers +for each project independently employed a method we +coined “rapid adaptation” of the original CDB software that +resulted in two very successful mission data systems with +ultimately very few similarities or shared code. +At the time the NSCAT follow-on mission (SeaWinds on +ADEOS II) was ramping up, a technology task originally +funded by the NASA Office of Space Science was focused +on architecting and developing a common, standards-based +software framework dubbed Object Oriented Data Technology +(OODT) [12]. OODT provided “out of the box” core +data management software services while remaining adaptable +to address the (potentially evolving) requirements that +are unique from mission to mission. +Several authors of this paper supporting SeaWinds and +the OODT technology task decided to collaborate to create +a platform- and database-independent service for managing +files and tasks. The result of this collaboration was +the OODT Catalog and Archive Service component that +was architected to be reusable, reliable and scalable. The +SeaWinds (on QuikSCAT and ADEOS II) and Advanced +Communications Technology Satellite (ACTS) Mobile Terminal +(AMT) projects benefited greatly from employing +the CAS component to support their science data systems. +QuikSCAT is in its 10th year of a planned 2-year mission +and is continuing to function in a nearly lights out mode. +Hardware has been added to the system to support the unplanned +data and processing volumes (reprocessing of 7 +years of data completed in 6 months, simultaneous with +daily operations) by simply changing the software configuration. +No software engineers were required to extend the +system. +While the CAS component successfully supported Sea- +Winds and AMT, the following JPL earth missions, Orbiting +Carbon Observatory (OCO) and NPP Sounder PEATE, +needed to support far more complex processing (greatly increased +data volumes and processing throughput) and various +hardware and platform configurations. This forced us to +rethink the CAS component implementation which resulted +in 1) the refactoring of the CAS component into two distinct +components, the File Manager and the Workflow Manager +and 2) the development of a third component to provide a +standard interface to various hardware and platform configurations, +the Resource Manager. +The refactoring of the CAS into the File Manager and the +Workflow Manager components solved several issues. First, +it decoupled the initiation of a workflow from the ingestion +of a file. Therefore, while workflows can be initiated based +on the ingestion of a particular file or file type, they can also +be initiated based on other events such as a specific time of +day, an operator request or a software request. Second, the +refactoring provides developers and system designers the +ability to utilize only the components they need. And third, +the refactoring supports independent evolution of the components, +and thus capabilities. The combination of these +three refactored CAS components have come to be known +as the Process Control System, or PCS. +In addition to the File Manager, Workflow Manager +and Resource Manager components that provide common +reusable capabilities for file and metadata management, +pipeline processing and job submission, we have also developed +reusable interfaces to these components to provide +additional commonly required capabilities for science data +management systems. To support the automation of file ingestion, +we have developed a configurable push-pull framework +and crawler framework. To provide easy integration +of science code in order to support all phases of algorithm +development (testbed, operations and science computing facility), +the PCS Task Wrapper has been developed. +In this paper we will describe our core PCS components, +their architecture, how they helped us solve problems on +OCO and NPP Sounder PEATE, and how they are positioning +us for the future of Earth science mission work. We believe +such work will necessitate the same spirt of architectural +reuse, understanding and mission specific adaptation +that led to the genesis of the modern PCS and that will ultimately +lead to its future evolution. We will argue in this paper +that our PCS uniquely positions us in the state of the art +in constructing large-scale, distributed, data-intensive GDS +software for NASA Earth science missions. +The rest of this paper is organized as follows. Section 2 +provides further background and related efforts in the areas +of grid computing, workflow systems and science data systems. +Section 3 describes the core PCS architectural components +in greater detail. Section 4 presents our experience +leveraging the PCS on OCO and NPP Sounder PEATE. Section +5 rounds out the paper with conclusions and highlights +our planned future work. +2 Background and Related Work +Since the development of the computational grid [8] as +a means for the virtualization and sharing of processing +and storage resources across organizational and geographic +boundaries, many groups and organizations have recognized +the power of the grid as an enabler of large-scale scientific +research. In this paper, we discuss ongoing software +projects and research initiatives relevant to the PCS. +2.1 Grid Systems +The Globus toolkit [9], developed by The Globus Alliance, +is a collection of open-source software tools for developing +distributed computing systems and applications. +The toolkit provides users with a suite of software components +and libraries that can either be used individually or +packaged together to implement the many aspects of a distributed, +service-oriented infrastructure including security, +resource and data discovery, access, and management, and +communication modules customized for a particular gridbased +effort. +2.2 Workflow Systems +The past ten years have witnessed an explosion in the +number of workflow languages and software systems developed +to support scientific workflows. Yu and Buyya [15] +attempted to taxonomize these scientific workflow systems, +largely according the underlying technologies with which +they were built. In addition to this taxonomy, Woollard, et. +al., presented a characterization of workflow systems based +the intended scientific use [14]. Specifically, the authors +classified certain workflow systems as Production Systems, +of which both the OCO and NPP Sounder PEATE ground +data systems are examples. +2.2.1 Condor +Condor [11] is a grid-based job scheduling system developed +at the University of Wisconsin Madison which aims, +among other things, to improve the effective usage of available +distributed computing and storage resources by detecting +and exploiting machine idle cycles. Condor provides +mechanisms for job queuing, setting scheduling policies, +and general resource management and monitoring. Condor +insulates users from the particulars of the details of the underlying +infrastructure by transparently handling decisions +about when and where jobs will be scheduled, monitoring +their execution, and producing notifications of completion. +While originally designed to operate in a workstation environment, +a variant of Condor, Condor-G [10], leverages the +Globus toolkit to provide a Condor implementation that is +interoperable with Globus-based grids. +2.2.2 Pegasus +Pegasus [7] is similar to Condor in that it provides a layer of +abstraction between the jobs to be processed and the hardware +that they will eventually be processed on. Developed +at the USC Information Science Pegasus is capable of dynamically +assigning computational workflows with multiple +processing steps to a large number of grid-based compute +nodes based on resource availability. In addition to generating +an initial workflow mapping, Pegasus offers the ability +to transparently remap a workflow, increasing the reliability +of the system in the event of failure in a small number of +compute nodes. +2.3 Science Data Processing Systems +Science Data Processing Systems provide the base level +of service needed to effectively manage the vast quantities +of intermediate and final data products generated by largescale, +computationally intensive research tasks. While there +are a large number of systems in operation, we focus our +discussion on those which provide services distinctly similar +to the PCS. +2.3.1 S4PA +The Simple, Scalable, Script-based Science Product +Archive (S4PA) [3], is a storage architecture developed and +deployed at NASAs Goddard Space Flight Center in support +of the operation of the Goddard Earth Science Data +and Information Services Center (GES DISC). As cost was +a primary factor in the development of S4PA, the developers +have taken pains to streamline the system. Hosting the +primary copy of all data online reduced the need for costly +physical media distribution, and utilizing the UNIX directory +structure, in combination with metadata-encoded filenames, +provides a simplified mechanism for archive and retrieval. +As its name implies, the S4PA is primarily a data archive +service. The PCS, as described in this paper, addresses data +archiving, but takes a more architecturally grounded approach, +eschewing scripts in favor of first-class architectural +components and connectors to implement complete, endto- +end data processing pipelines. Furthermore, as complete +science data processing pipelines are composed of a large +number of complimentary, interconnected services, a formal +architectural underpinning helps to provide unity and +cohesion among the constituent components. +2.4 Standards +Grid-based science data processing systems have matured +sufficiently for common themes, lessons, and challenges +to emerge among the many participants. As a result, +there are several ongoing efforts to codify the shared knowledge +and experience into formal standards. We discuss the +Open Grid Framework and the Open Archives Initiatives +Protocol for Metadata Harvesting. +2.4.1 OGF +The Open Grid Forum [2] is actively developing standards +and specifications with the goal of spreading the adoption +of grid-based software systems. The OGF is comprised +of business, government, scientific, and academic organizations +and focuses on interoperability as the key to expanding +the utilization of grids. Through both advocacy and policy, +the OGF represents an independent voice on the role of +grids, and their potential to aid modern research. +2.4.2 OAI +The Open Archives Initiative [1] also promotes standards +for interoperability and has developed, among others, the +Protocol for Metadata Harvesting (OMI-PMH). The goal +of the OMI-PMH is to improve application interoperability +by enabling consistency in the way metadata (data about +data) is exposed, accessed, and interpreted. By providing +a flexible, extensible standard interface to the rich array +of application-specific metadata currently stored in nonuniform, +distributed repositories, the OAI hopes to facilitate +the broader accessibility and usability of distributed data resources. +3 PCS Core Architecture +In this section, we describe the PCS core components. +The three PCS manager components, File Manager, Workflow +Manager, and Resource Manager, are daemon-like web +service components responsible for answering basic questions +regarding file locations, metadata, task control and +data flow, and resource availability, monitoring, and usage. +The three PCS frameworks together implement one of +two critical higher level services in data processing systems: +(1) managing the ingestion and acquisition of remotely acquired +datasets, handled via the Crawler Framework and +Push Pull components ; and (2) managing pipeline processing, +product ingestion and data production, handled via the +PCS Task Wrapper. We will describe each component in +greater detail below. The overall PCS architecture described +in this architecture is given in Fig. 1. +3.1 File Manager +The File Manager component is responsible for tracking, +ingesting and moving file data and metadata between a +client system and a server system. The File Manager is an +extensible software component that provides an XML-RPC +external interface, and a fully tailorable Java-based API for +file management. The critical objects managed by the File +Manager include: +Products - Collections of one or more files, and their associated +Metadata. +Metadata - A map of key to multiple values of descriptive +information about a Product. +References - Pointers to a Product file’s original location, +and to its final resting location within the archive constructed +by the File Manager. +Product Type - Descriptive information about a Product +that includes what type of file Uniform Resource Identifier +(URI) [5] generation scheme to use, the root +repository location for a particular Product, and a description +of the Product. +Element - A singular Metadata element, such as “Author”, +or “Creator”. Elements may have additional metadata, +in the form of the associated definition and even a corresponding +Dublin Core [4] attribute. +Versioner - A URI generation scheme for Product Types +that defines the location within the archive (built by +the File Manager) where a file belonging to a Product +(that belongs to the associated Product Type) should be +placed. +Each Product contains one or more References, and one +Metadata object. Each Product is a member of a single +Product Type. The Metadata collected for each Product is +defined by a mapping of Product Type to one or more Elements. +Each Product Type has an associated Versioner. +3.2 Workflow Manager +TheWorkflow Manager component is responsible for description, +execution, and monitoring of Workflows, using a +client, and a server system. Workflows are typically considered +to be sequences of tasks, joined together by control +flow, and data flow, that must execute in some ordered +fashion. Workflows typically generate output data, perform +routine management tasks (such as email, etc.), or describe +a business’s internal routine practices [14]. The Workflow +Manager is an extensible software component that provides +an XML-RPC external interface, and a fully tailorable Javabased +API for workflow management. The critical objects +managed by the Workflow Manager include: +Events - are what triggerWorkflows to be executed. Events +are named, and contain dynamic Metadata information, +passed in by the user. +Metadata - a dynamic set of properties, and values, provided +to a WorkflowInstance via a user-triggered +Event. +Workflow - a description of both the control flow, and data +flow of a sequence of tasks (or stages that must be executed +in some order. +Workflow Instance - an instance of a Workflow, typically +containing additional runtime descriptive information, +such as start time, end time, task wall clock time, etc. +A WorkflowInstance also contains a shared Metadata +context, passed in by the user who triggered theWorkflow. +This context can be read/written to by the underlying +WorkflowTasks, present in a Workflow. +Workflow Tasks - descriptions of data flow, and an underlying +process, or stage, that is part of a Workflow. +Workflow Task Instances - the actual executing code, or +process, that performs the work in the Workflow Task. +Workflow Task Configuration - static configuration +properties, that configure a WorkflowTask. +Workflow Conditions - any pre (or post) conditions on the +execution of a WorkflowTask. +Workflow Condition Instances - the actual executing +code, or process, that performs the work in the Workflow +Condition. +Each Event initiates one or more Workflow Instances, +providing a Metadata context (submitted by an external +user). Each Workflow Instance is a run-time execution +model of a Workflow. Each Workflow contains one or +more Workflow Tasks. Each Workflow Task contains a single +Workflow Task Configuration, and one or more Workflow +Conditions. Each Workflow Task has a corresponding +Workflow Task Instance (that it models), as does each +Workflow Condition have a corresponding Workflow Condition +Instance. +3.3 Resource Manager +The Resource Manager component is responsible for excecution, +monitoring and traacking of jobs, storage and networking +resources for an underlying set of hardware resources. +The Resource Manager is an extensible software +component that provides an XML-RPC external interface, +and a fully tailorable Java-based API for resource management. +The critical objects managed by the Resource Manager +include: +Job - an abstract representation of an execution unit, that +stores information about an underlying program, or execution +that must be run on some hardware node ,including +information about the Job Input that the Job +requires, information about the job load, and the queue +that the job should be submitted to. +Job Input - an abstrct representation of the input that a Job +requires. +Job Spec - a complete specification of a Job, including its +Job Input, and the Job definition itself. +Job Instance - the physical code that performs the underlying +job execution. +Resource Node - an available execution node that a Job is +sent to by the Resource Manager. +Each Job Spec contains exactly one Job, and Job Input. +Each Job Input is provided to a single Job. Each Job describes +a single Job Instance. And finally, each Job is sent +to exactly one Resource Node. +3.4 Crawler Framework +The Crawler Framework was an effort to standardize the +common ingestion activities that occur both in data acquisition +and archival, as well as those that occur in pipeline +processing. These types of activities regularly involve identification +of files and directories to crawl (based on e.g., +mime type, regular expressions, or direct user input), satisfaction +of ingestion pre-conditions (e.g., the current crawled +file has not been previously ingested), followed by metadata +extraction. After metadata extraction, crawled data follows +a standard three state lifecycle: (1) preIngestion - where +e.g., a file may be unzipped or pre-processed prior to ingestion; +(2) postIngest success, indicating a successful ingestion +has occurred and e.g., the origin data file from the +ingest area should be deleted; and (3) postIngest failure, indicating +that ingestion was not successful and some corrective +action, e.g,. moving the failed file to a failure area for +later examination, should occur. +To date, we have identified three types of Product +Crawlers, where each Crawler varies along the lines of customized +precondition verification, crawilng strategy, and +need for metadata extraction. The StdProductCrawler assumes +that a Metadata object has already been generated +and included with a Product prior to ingestion, so no further +work is required to generate Metadata from a Product – +the Product is ready to be ingested. The MetExtractorProductCrawler +is responsible for generating a Metadata object +dynamically, as files are encountered during the crawling +process. Finally, the AutoDetectCrawler uses a content +type identification and regular-expressions to identify Product +Types dynamically, and then defaults to the behavior of +the MetExtractorProductCrawler for Product Types identified +via content detection. The critical objects managed by +the Crawler Framework are: +Crawler Action - is attached to one or more of the three +phases, and when a ProductCrawler enters a given +phases, all the CrawlerActions attached to that phase +are executed. The valid phases are: preIngest, +postIngestSuccess and postIngestFailure. +Precondition Comparator - is used by MetExtractorProductCrawler +and AutoDetectProductCrawler. They are +part of those ProductCrawlers customized implementation +of precondition verification that identify appropriate +times to stifle or allow metadata extractor and +ultimately ingestion, to occur. +Metadata Extractor - is run by the MetExtractorProductCrawler +and the AutoDetectProductCrawler to +generate Metadata for a Product file based on some +business rules and logic. +3.5 Push Pull Framework +The Crawler Framework supports many generic ingestion +services, including metadata extraction, crawling, and +ingestion, however, one service that necessitated further +work was the development of a protocol layer allowing +a ProductCrawler to obtain content using protocol plugins +that download content using implementations of remote +protocols such as HTTP, FTP, WinNT file system, HTTPS, +etc. +The Push Pull Framework is responsible for remote data +acquisition and acceptance over modern web protocols, +such as those mentioned above. The Push Pull Framework +is flexible in that it provides the ability to plug in different +Metadata Extractors, Data Protocols, Content Types, +etc. The framework supports parallel file transfers and data +downloads, email-based push data acceptance using IMAP, +SMTP protocols, and the ability to configure “Virtual” remote +directories (based on Metadata such as Date/Time) +from which files can be downloaded. +The critical objects managed by the Push Pull Framework +are: +Retrieval Method - defines the manner in which files are +retrieved from remote sites. It is given a configuration +file, a the Parser for the file, and a FileRetrievalSystem +(which handles all the complexities of multi-threaded +file downloading). There are currently two out-of-thebox +RetrievalMethods: RemoteCrawler and ListRetriever. +RemoteCrawler is a configurable remote site +directory and file regular expression filterable crawler. +ListRetriever will download a given list of file URIs +[5]. +Parser - parses a given configuration file into a Virtual- +FileStructure which is use to filter URIs to download. +Protocol - handles file transfer and communication via +some transfer protocol. Currently implemented Protocols +include: sftp, ftp, http, imaps, file (localhost). +3.6 PCS Task Wrapper +The PCS Task Wrapper framework is responsible for +standardizing the setup, process initiation, execution and +file management tasks surrounding execution of NASA +Product Generation Executives, or PGEs. PGEs codify a +scientific algorithm, some step in the overall scientific process +involved in a mission science workflow. +The PCS Task Wrapper provides a stable operating environment +to the underlying PGE during its execution lifecycle. +If the PGE requires a file, or metadata regarding the +file, the PCS Task Wrapper is responsible for delivering that +information to the PGE in a manner that meets its requirements. +If the PGE requires knowledge of upstream or downstream +PGEs in a sequence of executions, that information +is also made available, and finally if information regarding +disk space, node information such as CPU availability, etc., +is required the PCS Task Wrapper provides this information +to the underlying PGE. After this information is collected, +the PGE is executed and its output Product file and +Metadata generation is managed via the PCS Task Wrapper +framework. The PCS Task Wrapper is responsible for +marshalling output Products and Metadata back to the File +Manager for use in downstream data processing and pedigree. +In support of this, the PCS Task Wrapper leverages +the Crawler Framework to ingest (during pipeline processing) +the output Product files and Metadata produced by the +PGE. +As can be gleaned from the above discussion, the PGE +Task Wrapper is really the unifying bridge between the execution +of a step in the overall processing pipeline, and the +available PCS component services and the information that +they collectively manage. +The critical objects managed by the PCS Task Wrapper +are: +PGETaskInstance - an abstract class which contains a +generalized set of actions usually performed when running +PGEs. Every variable and method is protected, +thus allowing subclasses to easily modify just those +generalized actions which need to be customized for +different PGE. +Pge Config File Builder - builds a PgeConfig object and +set additional Metadata which codifies the information +necessary for orchestrating a PGE through its lifecycle. +The PCS Task Wrapper is based on a simple but +powerful XML syntax which allows a scientist to simply +fill out an xml file to describe the necessary steps +to execute a PGE. +Config File Property Adder - builds the Pge Config file +object and sets custom PGE Metadata. This allows +for a general PgeConfigBuilder with different Config- +FilePropertyAdders for setting PGE specific fields in +the PgeConfig object. +Science Pge Config File Writer - passes a PGE run information +via configuration files. This object allows +for any number of config files in any desired format +to be generated describing PGE input and those files +to be delivered to the PGE. The PCS Task Wrapper +provides existing implementations, including a deFigure +1. Component Interaction Within the PCS +fault XML Stylesheet Language (XSL) Transformation +based SciPgeConfigFileWriter. +Pcs Met File Writer - aids in generating Metadata objects +associated with PGE output products. +4 Experience and Evaluation +We have successfully applied the Process Control System +(PCS) to existing NASA missions: the Orbiting Carbon +Observatory (OCO) mission, and the NPP Sounder PEATE +mission. Both systems involve tasks such as high throughput +job processing, terabyte-scale data management, and +science computing facilities. +4.1 Orbiting Carbon Observatory Mission +On OCO, the mission is using the File Manager to ingest +MODIS, CloudSat and other ancillary data products +for use in the high performance Level 2 Science Algorithm. +To date, OCO has already used the PCS software to process +over four terabytes of Fourier Transform Spectrometer +(FTS) data provided by ground-based instruments located +around the country (e.g., Park falls, Montana, and Darwin, +Australia), and has used the software to support Instrument +Thermal Vacuum (TVAC) testing, processing 100% of all +data taken by the OCO instrument during TVAC. Also, the +PCS supports a science computing facility in which variants +of scientific software can be excursive prior to inclusion in +an operations Pipeline. +4.2 NPP Sounder PEATE Mission +Specifically NPP Sounder PEATE has already used the +File Manager and Workflow Manager to ingest and process +hundreds of gigabytes of IASI data (and is in preparation to +accept CRIMS data). Also on PEATE, the PCS is currently +being used to re-catalog over fifteen million existing science +data products from the NASA AIRS missions TLSCF. +In addition, the Resource Manager will be used on NPP to +support job processing across an eighty-node cluster. +4.3 Further Applications +In addition to the two aforementioned NASA missions, +the PCS framework is being leveraged on reimbursable +work for the National Cancer Institute (NCI)’s Early Detection +Research Network (EDRN) [6]. JPL leads the informatics +efforts on EDRN, and the PCS framework is being +used in the collection, annotation and dissemination of raw +scientific data supporting the early detection of cancer to +scientists across the country. +In the next year, PCS will also be used to support a new +JPL-led NASA mission, the Soil Moisture Active Passive +(SMAP) mission. The science computing facility designs +on OCO and NPP have been used to create an algorithm +testbed for SMAP scientists early in the design phase of the +mission so that software integration risks can be mitigated +during mission development [13]. +5 Conclusions and Future Work +While the norm for earth science missions has been for +each mission to develop their own one-off science data system +from scratch, the continual decrease in mission funding +combined with the exponential increase in mission complexity +(data volume and processing throughput) over the +last decade has made this approach pass´e and risky. It was +clear that the need for a new approach was eminent. +To this end, we have developed a standards-based software +framework to provide common science data system +services that yields the benefits of reuse while remaining +adaptable to address the requirements that are unique to the +customer. This reusable software is centered around the +most basic science data system functions that support file +and metadata management, workflow management, and resource +management. Additional frameworks augment the +core capabilities to provide automation for remote data acquisition, +data ingestion and standard pipeline processing. +This reusable software framework is the Process Control +System (PCS) we have described in this paper. +While the PCS has successfully supported the Orbiting +Carbon Observatory (OCO) and NPP Sounder PEATE +missions, upcoming missions in NASAs Decadal Survey +present additional challenges. The JPL-led Soil Moisture +Active Passive (SMAP) Mission (currently in formulation +phase) will be using the PCS not only for operations, but +also for the algorithm testbed and the science computing facility. +Providing the operational infrastructure to the algorithm +team early in the mission lifecycle will greatly reduce +the cost and risk of development-to-operations for the most +costly and risky aspect of most earth science data systems, +the algorithms. However, this also means that easy integration +of algorithms and dynamic workflow specification +are our current focus for extending the PCS capabilities. +Not far behind SMAP is another JPL-led mission, Deformation, +Ecosystem Structure and Dynamics of Ice (DESDynI) +Mission. The challenges of DESDynI are requiring +us to consider the deployment of PCS components to support +a grid architecture, supporting distributed file management +and processing capabilities supported by centralized +access to a virtual science data system. +Acknowledgements +This effort was supported by the Jet Propulsion Laboratory, +managed by the California Institute of Technology +under a contract with the National Aeronautics and Space +Administration. +References +[1] Open archives initiative, http://www.openarchives.org. +[2] Open grid forum, http://www.ogf.org. +[3] S4pa, http://daac.gsfc.nasa.gov/techlab/s4pa/index.shtml. +[4] Dublin core metadata element set, 1999. +[5] T. Berners-Lee, R. Fielding, and L. Masinter. Uniform resource +identifiers (uri): Generic syntax. Technical Report +RFC 2396, 1998. +[6] D. Crichton, S. Kelly, C. Mattmann, Q. Xiao, J. S. Hughes, +J. Oh, M. Thornquist, D. Johnsey, S. Srivastava, L. Essermann, +and W. Bigbee. A distributed information services +architecture to support biomarker discovery in early detection +of cancer. In e-Science, page 44, 2006. +[7] E. Deelman, J. Blythe, Y. Gil, C. Kesselman, G. Mehta, +S. Patil, M.-H. Su, K. Vahi, and M. Livny. Pegasus: Mapping +Scientific Workflows onto the Grid. 2004. +[8] I. Foster. The anatomy of the grid: Enabling scalable virtual +organizations. pages 6–7, 2001. +[9] I. Foster. Globus toolkit version 4: Software for serviceoriented +systems. pages 2–13. 2005. +[10] J. Frey, T. Tannenbaum, M. Livny, I. Foster, and S. Tuecke. +Condor-g: A computation management agent for multiinstitutional +grids. Cluster Computing, 5(3):237–246, July +2002. +[11] M. J. Litzkow, M. Livny, and M.W. Mutka. Condor-a hunter +of idle workstations. pages 104–111, 1988. +[12] C. Mattmann, D. J. Crichton, N. Medvidovic, and +S. Hughes. A software architecture-based framework for +highly distributed and data intensive scientific applications. +In ICSE, pages 721–730, 2006. +[13] D. Woollard, O. ig Kwoun, T. Bicknell, S. Dunbar, and +K. Leung. A science data system approach for the smap +mission. In IEEE Radar, 2009. +[14] D. Woollard, N. Medvidovic, Y. Gil, and C. A. Mattmann. +Scientific software as workflows: From discovery to distribution. +Software, IEEE, 25(4):37–43, 2008. +[15] J. Yu and R. Buyya. A taxonomy of workflow management +systems for grid computing, Apr 2005. +View publication stats \ No newline at end of file diff --git a/src/main/resources/cdtocode/doc/Apache OODT File Manager/A Reusable Process Control System Framework for the Orbiting Carbon Observatory and NPP Sounder PEATE missions.txt b/src/main/resources/cdtocode/doc/Apache OODT File Manager/A Reusable Process Control System Framework for the Orbiting Carbon Observatory and NPP Sounder PEATE missions.txt new file mode 100644 index 0000000000000000000000000000000000000000..de3ecb351d1cf7a834e774d043c5e3cc5845f0eb --- /dev/null +++ b/src/main/resources/cdtocode/doc/Apache OODT File Manager/A Reusable Process Control System Framework for the Orbiting Carbon Observatory and NPP Sounder PEATE missions.txt @@ -0,0 +1,716 @@ +See discussions, stats, and author profiles for this publication at: https://www.researchgate.net/publication/232619682 +A Reusable Process Control System Framework for the Orbiting Carbon +Observatory and NPP Sounder PEATE missions +Article · July 2009 +DOI: 10.1109/SMC-IT.2009.27 +CITATIONS +21 +READS +90 +11 authors, including: +Some of the authors of this publication are also working on these related projects: +Airborne Snow Observatory View project +The Planetary Data System PDS4 Information Model-Driven Architecture View project +Daniel J. Crichton +NASA +138 PUBLICATIONS 791 CITATIONS +SEE PROFILE +Sean Hardman +NASA +20 PUBLICATIONS 53 CITATIONS +SEE PROFILE +Paul Ramirez +NASA +39 PUBLICATIONS 408 CITATIONS +SEE PROFILE +Sean Colin-Patrick Kelly +NASA +43 PUBLICATIONS 213 CITATIONS +SEE PROFILE +All content following this page was uploaded by Paul Ramirez on 22 May 2014. +The user has requested enhancement of the downloaded file. +A Reusable Process Control System Framework for the Orbiting Carbon +Observatory and NPP Sounder PEATE missions +Chris A. Mattmann, Dana Freeborn, Dan Crichton, Brian Foster, +Andrew Hart, David Woollard, Sean Hardman, Paul Ramirez, +Sean Kelly, Albert Y. Chang, Charles E. Miller +Jet Propulsion Laboratory +California Institute of Technology +Pasadena, CA 91109, USA +mattmann@jpl.nasa.gov +Abstract +We describe a reusable architecture and implementation +framework for managing science processing pipelines for +mission ground data systems. Our system, dubbed “PCS”, +for Process Control System, improves upon an existing software +component, the OODT Catalog and Archive (CAS), +which has already supported the QuikSCAT, SeaWinds and +AMT earth science missions. This paper focuses on PCS +within the context of two current earth science missions: the +Orbiting Carbon Observatory (OCO), and NPP Sounder +PEATE projects. +1 Introduction +Data volume and computational needs for Earth science +missions at NASA are growing by orders of magnitude. The +low cost of disk storage space and the increasing power +and pervasiveness of high performance computing have engendered +an era in which previously unimaginable science +questions can be answered in years rather than decades. +These science questions range from the study of sea surface +temperatures to observe maritime pollution, to measuring +atmospheric chemical composition for weather forecasting, +to obtaining a better understanding of the Earth’s global carbon +cycle and climate change as a whole. +A significant portion of any space-based NASA earth +science mission is a Ground Data System (GDS). The GDS +is responsible for receiving raw spacecraft data as delivered +from a ground station1, and processing the information +through several focused series of steps with the goal of +1A strategically placed data center on Earth with ample ground-tospace +bandwidth and connectivity for receiving satellite data. +delivering the scientific value encoded in the data to interested +scientists, both locally at an instrument team center, +and then to universities, decision makers, and the broader +science community. The processing that a GDS must perform +ranges from mundane activities including data (un- +)marshalling (removal of special space “header” information), +and subsetting, to more involved processes including +temporal and spatial positioning, calibration, and statistical +analysis, to complex scientific assimilation including +prospective and retrospective physical modeling of a scene. +Beginning with Automated Multi-Mission Operations +System (AMMOS) Multi-mission Ground Data System +(MGDS) in the early 1990s, our work has focused on building +reusable software components for GDS systems. As +an example, the Central Data Base (CDB) Subsystem of the +MGDS included data base management software comprised +of metadata and file management, file transfer capabilities, +user interfaces and data storage facilities to support multimission +telemetry data streams for current and future planetary +missions. This demanded that the CDB architecture +adhere to the architectural principles of extensibility, scalability, +and reusability. Because the CDB was and is part of +a larger system that included controlled, centralized hardware, +these architectural principles of CDB were satisfied +for AMMOS by simply ensuring that the CDB was data and +policy driven. +Our ensuing work on the Alaska SAR Facility (ASF) and +NASA Scatterometer (NSCAT) projects, made clear two +significant trends: 1) neither of these missions were part +of the controlled, centralized system for which the CDB +was developed and 2) the data management requirements +for these two missions were different from each other and +AMMOS. This meant that 1) hardware and platform choices +could not be assumed and 2) additional capabilities not originally +required for AMMOS had to be developed. In order +to meet mission schedule and cost constraints, developers +for each project independently employed a method we +coined “rapid adaptation” of the original CDB software that +resulted in two very successful mission data systems with +ultimately very few similarities or shared code. +At the time the NSCAT follow-on mission (SeaWinds on +ADEOS II) was ramping up, a technology task originally +funded by the NASA Office of Space Science was focused +on architecting and developing a common, standards-based +software framework dubbed Object Oriented Data Technology +(OODT) [12]. OODT provided “out of the box” core +data management software services while remaining adaptable +to address the (potentially evolving) requirements that +are unique from mission to mission. +Several authors of this paper supporting SeaWinds and +the OODT technology task decided to collaborate to create +a platform- and database-independent service for managing +files and tasks. The result of this collaboration was +the OODT Catalog and Archive Service component that +was architected to be reusable, reliable and scalable. The +SeaWinds (on QuikSCAT and ADEOS II) and Advanced +Communications Technology Satellite (ACTS) Mobile Terminal +(AMT) projects benefited greatly from employing +the CAS component to support their science data systems. +QuikSCAT is in its 10th year of a planned 2-year mission +and is continuing to function in a nearly lights out mode. +Hardware has been added to the system to support the unplanned +data and processing volumes (reprocessing of 7 +years of data completed in 6 months, simultaneous with +daily operations) by simply changing the software configuration. +No software engineers were required to extend the +system. +While the CAS component successfully supported Sea- +Winds and AMT, the following JPL earth missions, Orbiting +Carbon Observatory (OCO) and NPP Sounder PEATE, +needed to support far more complex processing (greatly increased +data volumes and processing throughput) and various +hardware and platform configurations. This forced us to +rethink the CAS component implementation which resulted +in 1) the refactoring of the CAS component into two distinct +components, the File Manager and the Workflow Manager +and 2) the development of a third component to provide a +standard interface to various hardware and platform configurations, +the Resource Manager. +The refactoring of the CAS into the File Manager and the +Workflow Manager components solved several issues. First, +it decoupled the initiation of a workflow from the ingestion +of a file. Therefore, while workflows can be initiated based +on the ingestion of a particular file or file type, they can also +be initiated based on other events such as a specific time of +day, an operator request or a software request. Second, the +refactoring provides developers and system designers the +ability to utilize only the components they need. And third, +the refactoring supports independent evolution of the components, +and thus capabilities. The combination of these +three refactored CAS components have come to be known +as the Process Control System, or PCS. +In addition to the File Manager, Workflow Manager +and Resource Manager components that provide common +reusable capabilities for file and metadata management, +pipeline processing and job submission, we have also developed +reusable interfaces to these components to provide +additional commonly required capabilities for science data +management systems. To support the automation of file ingestion, +we have developed a configurable push-pull framework +and crawler framework. To provide easy integration +of science code in order to support all phases of algorithm +development (testbed, operations and science computing facility), +the PCS Task Wrapper has been developed. +In this paper we will describe our core PCS components, +their architecture, how they helped us solve problems on +OCO and NPP Sounder PEATE, and how they are positioning +us for the future of Earth science mission work. We believe +such work will necessitate the same spirt of architectural +reuse, understanding and mission specific adaptation +that led to the genesis of the modern PCS and that will ultimately +lead to its future evolution. We will argue in this paper +that our PCS uniquely positions us in the state of the art +in constructing large-scale, distributed, data-intensive GDS +software for NASA Earth science missions. +The rest of this paper is organized as follows. Section 2 +provides further background and related efforts in the areas +of grid computing, workflow systems and science data systems. +Section 3 describes the core PCS architectural components +in greater detail. Section 4 presents our experience +leveraging the PCS on OCO and NPP Sounder PEATE. Section +5 rounds out the paper with conclusions and highlights +our planned future work. +2 Background and Related Work +Since the development of the computational grid [8] as +a means for the virtualization and sharing of processing +and storage resources across organizational and geographic +boundaries, many groups and organizations have recognized +the power of the grid as an enabler of large-scale scientific +research. In this paper, we discuss ongoing software +projects and research initiatives relevant to the PCS. +2.1 Grid Systems +The Globus toolkit [9], developed by The Globus Alliance, +is a collection of open-source software tools for developing +distributed computing systems and applications. +The toolkit provides users with a suite of software components +and libraries that can either be used individually or +packaged together to implement the many aspects of a distributed, +service-oriented infrastructure including security, +resource and data discovery, access, and management, and +communication modules customized for a particular gridbased +effort. +2.2 Workflow Systems +The past ten years have witnessed an explosion in the +number of workflow languages and software systems developed +to support scientific workflows. Yu and Buyya [15] +attempted to taxonomize these scientific workflow systems, +largely according the underlying technologies with which +they were built. In addition to this taxonomy, Woollard, et. +al., presented a characterization of workflow systems based +the intended scientific use [14]. Specifically, the authors +classified certain workflow systems as Production Systems, +of which both the OCO and NPP Sounder PEATE ground +data systems are examples. +2.2.1 Condor +Condor [11] is a grid-based job scheduling system developed +at the University of Wisconsin Madison which aims, +among other things, to improve the effective usage of available +distributed computing and storage resources by detecting +and exploiting machine idle cycles. Condor provides +mechanisms for job queuing, setting scheduling policies, +and general resource management and monitoring. Condor +insulates users from the particulars of the details of the underlying +infrastructure by transparently handling decisions +about when and where jobs will be scheduled, monitoring +their execution, and producing notifications of completion. +While originally designed to operate in a workstation environment, +a variant of Condor, Condor-G [10], leverages the +Globus toolkit to provide a Condor implementation that is +interoperable with Globus-based grids. +2.2.2 Pegasus +Pegasus [7] is similar to Condor in that it provides a layer of +abstraction between the jobs to be processed and the hardware +that they will eventually be processed on. Developed +at the USC Information Science Pegasus is capable of dynamically +assigning computational workflows with multiple +processing steps to a large number of grid-based compute +nodes based on resource availability. In addition to generating +an initial workflow mapping, Pegasus offers the ability +to transparently remap a workflow, increasing the reliability +of the system in the event of failure in a small number of +compute nodes. +2.3 Science Data Processing Systems +Science Data Processing Systems provide the base level +of service needed to effectively manage the vast quantities +of intermediate and final data products generated by largescale, +computationally intensive research tasks. While there +are a large number of systems in operation, we focus our +discussion on those which provide services distinctly similar +to the PCS. +2.3.1 S4PA +The Simple, Scalable, Script-based Science Product +Archive (S4PA) [3], is a storage architecture developed and +deployed at NASAs Goddard Space Flight Center in support +of the operation of the Goddard Earth Science Data +and Information Services Center (GES DISC). As cost was +a primary factor in the development of S4PA, the developers +have taken pains to streamline the system. Hosting the +primary copy of all data online reduced the need for costly +physical media distribution, and utilizing the UNIX directory +structure, in combination with metadata-encoded filenames, +provides a simplified mechanism for archive and retrieval. +As its name implies, the S4PA is primarily a data archive +service. The PCS, as described in this paper, addresses data +archiving, but takes a more architecturally grounded approach, +eschewing scripts in favor of first-class architectural +components and connectors to implement complete, endto- +end data processing pipelines. Furthermore, as complete +science data processing pipelines are composed of a large +number of complimentary, interconnected services, a formal +architectural underpinning helps to provide unity and +cohesion among the constituent components. +2.4 Standards +Grid-based science data processing systems have matured +sufficiently for common themes, lessons, and challenges +to emerge among the many participants. As a result, +there are several ongoing efforts to codify the shared knowledge +and experience into formal standards. We discuss the +Open Grid Framework and the Open Archives Initiatives +Protocol for Metadata Harvesting. +2.4.1 OGF +The Open Grid Forum [2] is actively developing standards +and specifications with the goal of spreading the adoption +of grid-based software systems. The OGF is comprised +of business, government, scientific, and academic organizations +and focuses on interoperability as the key to expanding +the utilization of grids. Through both advocacy and policy, +the OGF represents an independent voice on the role of +grids, and their potential to aid modern research. +2.4.2 OAI +The Open Archives Initiative [1] also promotes standards +for interoperability and has developed, among others, the +Protocol for Metadata Harvesting (OMI-PMH). The goal +of the OMI-PMH is to improve application interoperability +by enabling consistency in the way metadata (data about +data) is exposed, accessed, and interpreted. By providing +a flexible, extensible standard interface to the rich array +of application-specific metadata currently stored in nonuniform, +distributed repositories, the OAI hopes to facilitate +the broader accessibility and usability of distributed data resources. +3 PCS Core Architecture +In this section, we describe the PCS core components. +The three PCS manager components, File Manager, Workflow +Manager, and Resource Manager, are daemon-like web +service components responsible for answering basic questions +regarding file locations, metadata, task control and +data flow, and resource availability, monitoring, and usage. +The three PCS frameworks together implement one of +two critical higher level services in data processing systems: +(1) managing the ingestion and acquisition of remotely acquired +datasets, handled via the Crawler Framework and +Push Pull components ; and (2) managing pipeline processing, +product ingestion and data production, handled via the +PCS Task Wrapper. We will describe each component in +greater detail below. The overall PCS architecture described +in this architecture is given in Fig. 1. +3.1 File Manager +The File Manager component is responsible for tracking, +ingesting and moving file data and metadata between a +client system and a server system. The File Manager is an +extensible software component that provides an XML-RPC +external interface, and a fully tailorable Java-based API for +file management. The critical objects managed by the File +Manager include: +Products - Collections of one or more files, and their associated +Metadata. +Metadata - A map of key to multiple values of descriptive +information about a Product. +References - Pointers to a Product file’s original location, +and to its final resting location within the archive constructed +by the File Manager. +Product Type - Descriptive information about a Product +that includes what type of file Uniform Resource Identifier +(URI) [5] generation scheme to use, the root +repository location for a particular Product, and a description +of the Product. +Element - A singular Metadata element, such as “Author”, +or “Creator”. Elements may have additional metadata, +in the form of the associated definition and even a corresponding +Dublin Core [4] attribute. +Versioner - A URI generation scheme for Product Types +that defines the location within the archive (built by +the File Manager) where a file belonging to a Product +(that belongs to the associated Product Type) should be +placed. +Each Product contains one or more References, and one +Metadata object. Each Product is a member of a single +Product Type. The Metadata collected for each Product is +defined by a mapping of Product Type to one or more Elements. +Each Product Type has an associated Versioner. +3.2 Workflow Manager +TheWorkflow Manager component is responsible for description, +execution, and monitoring of Workflows, using a +client, and a server system. Workflows are typically considered +to be sequences of tasks, joined together by control +flow, and data flow, that must execute in some ordered +fashion. Workflows typically generate output data, perform +routine management tasks (such as email, etc.), or describe +a business’s internal routine practices [14]. The Workflow +Manager is an extensible software component that provides +an XML-RPC external interface, and a fully tailorable Javabased +API for workflow management. The critical objects +managed by the Workflow Manager include: +Events - are what triggerWorkflows to be executed. Events +are named, and contain dynamic Metadata information, +passed in by the user. +Metadata - a dynamic set of properties, and values, provided +to a WorkflowInstance via a user-triggered +Event. +Workflow - a description of both the control flow, and data +flow of a sequence of tasks (or stages that must be executed +in some order. +Workflow Instance - an instance of a Workflow, typically +containing additional runtime descriptive information, +such as start time, end time, task wall clock time, etc. +A WorkflowInstance also contains a shared Metadata +context, passed in by the user who triggered theWorkflow. +This context can be read/written to by the underlying +WorkflowTasks, present in a Workflow. +Workflow Tasks - descriptions of data flow, and an underlying +process, or stage, that is part of a Workflow. +Workflow Task Instances - the actual executing code, or +process, that performs the work in the Workflow Task. +Workflow Task Configuration - static configuration +properties, that configure a WorkflowTask. +Workflow Conditions - any pre (or post) conditions on the +execution of a WorkflowTask. +Workflow Condition Instances - the actual executing +code, or process, that performs the work in the Workflow +Condition. +Each Event initiates one or more Workflow Instances, +providing a Metadata context (submitted by an external +user). Each Workflow Instance is a run-time execution +model of a Workflow. Each Workflow contains one or +more Workflow Tasks. Each Workflow Task contains a single +Workflow Task Configuration, and one or more Workflow +Conditions. Each Workflow Task has a corresponding +Workflow Task Instance (that it models), as does each +Workflow Condition have a corresponding Workflow Condition +Instance. +3.3 Resource Manager +The Resource Manager component is responsible for excecution, +monitoring and traacking of jobs, storage and networking +resources for an underlying set of hardware resources. +The Resource Manager is an extensible software +component that provides an XML-RPC external interface, +and a fully tailorable Java-based API for resource management. +The critical objects managed by the Resource Manager +include: +Job - an abstract representation of an execution unit, that +stores information about an underlying program, or execution +that must be run on some hardware node ,including +information about the Job Input that the Job +requires, information about the job load, and the queue +that the job should be submitted to. +Job Input - an abstrct representation of the input that a Job +requires. +Job Spec - a complete specification of a Job, including its +Job Input, and the Job definition itself. +Job Instance - the physical code that performs the underlying +job execution. +Resource Node - an available execution node that a Job is +sent to by the Resource Manager. +Each Job Spec contains exactly one Job, and Job Input. +Each Job Input is provided to a single Job. Each Job describes +a single Job Instance. And finally, each Job is sent +to exactly one Resource Node. +3.4 Crawler Framework +The Crawler Framework was an effort to standardize the +common ingestion activities that occur both in data acquisition +and archival, as well as those that occur in pipeline +processing. These types of activities regularly involve identification +of files and directories to crawl (based on e.g., +mime type, regular expressions, or direct user input), satisfaction +of ingestion pre-conditions (e.g., the current crawled +file has not been previously ingested), followed by metadata +extraction. After metadata extraction, crawled data follows +a standard three state lifecycle: (1) preIngestion - where +e.g., a file may be unzipped or pre-processed prior to ingestion; +(2) postIngest success, indicating a successful ingestion +has occurred and e.g., the origin data file from the +ingest area should be deleted; and (3) postIngest failure, indicating +that ingestion was not successful and some corrective +action, e.g,. moving the failed file to a failure area for +later examination, should occur. +To date, we have identified three types of Product +Crawlers, where each Crawler varies along the lines of customized +precondition verification, crawilng strategy, and +need for metadata extraction. The StdProductCrawler assumes +that a Metadata object has already been generated +and included with a Product prior to ingestion, so no further +work is required to generate Metadata from a Product – +the Product is ready to be ingested. The MetExtractorProductCrawler +is responsible for generating a Metadata object +dynamically, as files are encountered during the crawling +process. Finally, the AutoDetectCrawler uses a content +type identification and regular-expressions to identify Product +Types dynamically, and then defaults to the behavior of +the MetExtractorProductCrawler for Product Types identified +via content detection. The critical objects managed by +the Crawler Framework are: +Crawler Action - is attached to one or more of the three +phases, and when a ProductCrawler enters a given +phases, all the CrawlerActions attached to that phase +are executed. The valid phases are: preIngest, +postIngestSuccess and postIngestFailure. +Precondition Comparator - is used by MetExtractorProductCrawler +and AutoDetectProductCrawler. They are +part of those ProductCrawlers customized implementation +of precondition verification that identify appropriate +times to stifle or allow metadata extractor and +ultimately ingestion, to occur. +Metadata Extractor - is run by the MetExtractorProductCrawler +and the AutoDetectProductCrawler to +generate Metadata for a Product file based on some +business rules and logic. +3.5 Push Pull Framework +The Crawler Framework supports many generic ingestion +services, including metadata extraction, crawling, and +ingestion, however, one service that necessitated further +work was the development of a protocol layer allowing +a ProductCrawler to obtain content using protocol plugins +that download content using implementations of remote +protocols such as HTTP, FTP, WinNT file system, HTTPS, +etc. +The Push Pull Framework is responsible for remote data +acquisition and acceptance over modern web protocols, +such as those mentioned above. The Push Pull Framework +is flexible in that it provides the ability to plug in different +Metadata Extractors, Data Protocols, Content Types, +etc. The framework supports parallel file transfers and data +downloads, email-based push data acceptance using IMAP, +SMTP protocols, and the ability to configure “Virtual” remote +directories (based on Metadata such as Date/Time) +from which files can be downloaded. +The critical objects managed by the Push Pull Framework +are: +Retrieval Method - defines the manner in which files are +retrieved from remote sites. It is given a configuration +file, a the Parser for the file, and a FileRetrievalSystem +(which handles all the complexities of multi-threaded +file downloading). There are currently two out-of-thebox +RetrievalMethods: RemoteCrawler and ListRetriever. +RemoteCrawler is a configurable remote site +directory and file regular expression filterable crawler. +ListRetriever will download a given list of file URIs +[5]. +Parser - parses a given configuration file into a Virtual- +FileStructure which is use to filter URIs to download. +Protocol - handles file transfer and communication via +some transfer protocol. Currently implemented Protocols +include: sftp, ftp, http, imaps, file (localhost). +3.6 PCS Task Wrapper +The PCS Task Wrapper framework is responsible for +standardizing the setup, process initiation, execution and +file management tasks surrounding execution of NASA +Product Generation Executives, or PGEs. PGEs codify a +scientific algorithm, some step in the overall scientific process +involved in a mission science workflow. +The PCS Task Wrapper provides a stable operating environment +to the underlying PGE during its execution lifecycle. +If the PGE requires a file, or metadata regarding the +file, the PCS Task Wrapper is responsible for delivering that +information to the PGE in a manner that meets its requirements. +If the PGE requires knowledge of upstream or downstream +PGEs in a sequence of executions, that information +is also made available, and finally if information regarding +disk space, node information such as CPU availability, etc., +is required the PCS Task Wrapper provides this information +to the underlying PGE. After this information is collected, +the PGE is executed and its output Product file and +Metadata generation is managed via the PCS Task Wrapper +framework. The PCS Task Wrapper is responsible for +marshalling output Products and Metadata back to the File +Manager for use in downstream data processing and pedigree. +In support of this, the PCS Task Wrapper leverages +the Crawler Framework to ingest (during pipeline processing) +the output Product files and Metadata produced by the +PGE. +As can be gleaned from the above discussion, the PGE +Task Wrapper is really the unifying bridge between the execution +of a step in the overall processing pipeline, and the +available PCS component services and the information that +they collectively manage. +The critical objects managed by the PCS Task Wrapper +are: +PGETaskInstance - an abstract class which contains a +generalized set of actions usually performed when running +PGEs. Every variable and method is protected, +thus allowing subclasses to easily modify just those +generalized actions which need to be customized for +different PGE. +Pge Config File Builder - builds a PgeConfig object and +set additional Metadata which codifies the information +necessary for orchestrating a PGE through its lifecycle. +The PCS Task Wrapper is based on a simple but +powerful XML syntax which allows a scientist to simply +fill out an xml file to describe the necessary steps +to execute a PGE. +Config File Property Adder - builds the Pge Config file +object and sets custom PGE Metadata. This allows +for a general PgeConfigBuilder with different Config- +FilePropertyAdders for setting PGE specific fields in +the PgeConfig object. +Science Pge Config File Writer - passes a PGE run information +via configuration files. This object allows +for any number of config files in any desired format +to be generated describing PGE input and those files +to be delivered to the PGE. The PCS Task Wrapper +provides existing implementations, including a deFigure +1. Component Interaction Within the PCS +fault XML Stylesheet Language (XSL) Transformation +based SciPgeConfigFileWriter. +Pcs Met File Writer - aids in generating Metadata objects +associated with PGE output products. +4 Experience and Evaluation +We have successfully applied the Process Control System +(PCS) to existing NASA missions: the Orbiting Carbon +Observatory (OCO) mission, and the NPP Sounder PEATE +mission. Both systems involve tasks such as high throughput +job processing, terabyte-scale data management, and +science computing facilities. +4.1 Orbiting Carbon Observatory Mission +On OCO, the mission is using the File Manager to ingest +MODIS, CloudSat and other ancillary data products +for use in the high performance Level 2 Science Algorithm. +To date, OCO has already used the PCS software to process +over four terabytes of Fourier Transform Spectrometer +(FTS) data provided by ground-based instruments located +around the country (e.g., Park falls, Montana, and Darwin, +Australia), and has used the software to support Instrument +Thermal Vacuum (TVAC) testing, processing 100% of all +data taken by the OCO instrument during TVAC. Also, the +PCS supports a science computing facility in which variants +of scientific software can be excursive prior to inclusion in +an operations Pipeline. +4.2 NPP Sounder PEATE Mission +Specifically NPP Sounder PEATE has already used the +File Manager and Workflow Manager to ingest and process +hundreds of gigabytes of IASI data (and is in preparation to +accept CRIMS data). Also on PEATE, the PCS is currently +being used to re-catalog over fifteen million existing science +data products from the NASA AIRS missions TLSCF. +In addition, the Resource Manager will be used on NPP to +support job processing across an eighty-node cluster. +4.3 Further Applications +In addition to the two aforementioned NASA missions, +the PCS framework is being leveraged on reimbursable +work for the National Cancer Institute (NCI)’s Early Detection +Research Network (EDRN) [6]. JPL leads the informatics +efforts on EDRN, and the PCS framework is being +used in the collection, annotation and dissemination of raw +scientific data supporting the early detection of cancer to +scientists across the country. +In the next year, PCS will also be used to support a new +JPL-led NASA mission, the Soil Moisture Active Passive +(SMAP) mission. The science computing facility designs +on OCO and NPP have been used to create an algorithm +testbed for SMAP scientists early in the design phase of the +mission so that software integration risks can be mitigated +during mission development [13]. +5 Conclusions and Future Work +While the norm for earth science missions has been for +each mission to develop their own one-off science data system +from scratch, the continual decrease in mission funding +combined with the exponential increase in mission complexity +(data volume and processing throughput) over the +last decade has made this approach pass´e and risky. It was +clear that the need for a new approach was eminent. +To this end, we have developed a standards-based software +framework to provide common science data system +services that yields the benefits of reuse while remaining +adaptable to address the requirements that are unique to the +customer. This reusable software is centered around the +most basic science data system functions that support file +and metadata management, workflow management, and resource +management. Additional frameworks augment the +core capabilities to provide automation for remote data acquisition, +data ingestion and standard pipeline processing. +This reusable software framework is the Process Control +System (PCS) we have described in this paper. +While the PCS has successfully supported the Orbiting +Carbon Observatory (OCO) and NPP Sounder PEATE +missions, upcoming missions in NASAs Decadal Survey +present additional challenges. The JPL-led Soil Moisture +Active Passive (SMAP) Mission (currently in formulation +phase) will be using the PCS not only for operations, but +also for the algorithm testbed and the science computing facility. +Providing the operational infrastructure to the algorithm +team early in the mission lifecycle will greatly reduce +the cost and risk of development-to-operations for the most +costly and risky aspect of most earth science data systems, +the algorithms. However, this also means that easy integration +of algorithms and dynamic workflow specification +are our current focus for extending the PCS capabilities. +Not far behind SMAP is another JPL-led mission, Deformation, +Ecosystem Structure and Dynamics of Ice (DESDynI) +Mission. The challenges of DESDynI are requiring +us to consider the deployment of PCS components to support +a grid architecture, supporting distributed file management +and processing capabilities supported by centralized +access to a virtual science data system. +Acknowledgements +This effort was supported by the Jet Propulsion Laboratory, +managed by the California Institute of Technology +under a contract with the National Aeronautics and Space +Administration. +References +[1] Open archives initiative, http://www.openarchives.org. +[2] Open grid forum, http://www.ogf.org. +[3] S4pa, http://daac.gsfc.nasa.gov/techlab/s4pa/index.shtml. +[4] Dublin core metadata element set, 1999. +[5] T. Berners-Lee, R. Fielding, and L. Masinter. Uniform resource +identifiers (uri): Generic syntax. Technical Report +RFC 2396, 1998. +[6] D. Crichton, S. Kelly, C. Mattmann, Q. Xiao, J. S. Hughes, +J. Oh, M. Thornquist, D. Johnsey, S. Srivastava, L. Essermann, +and W. Bigbee. A distributed information services +architecture to support biomarker discovery in early detection +of cancer. In e-Science, page 44, 2006. +[7] E. Deelman, J. Blythe, Y. Gil, C. Kesselman, G. Mehta, +S. Patil, M.-H. Su, K. Vahi, and M. Livny. Pegasus: Mapping +Scientific Workflows onto the Grid. 2004. +[8] I. Foster. The anatomy of the grid: Enabling scalable virtual +organizations. pages 6–7, 2001. +[9] I. Foster. Globus toolkit version 4: Software for serviceoriented +systems. pages 2–13. 2005. +[10] J. Frey, T. Tannenbaum, M. Livny, I. Foster, and S. Tuecke. +Condor-g: A computation management agent for multiinstitutional +grids. Cluster Computing, 5(3):237–246, July +2002. +[11] M. J. Litzkow, M. Livny, and M.W. Mutka. Condor-a hunter +of idle workstations. pages 104–111, 1988. +[12] C. Mattmann, D. J. Crichton, N. Medvidovic, and +S. Hughes. A software architecture-based framework for +highly distributed and data intensive scientific applications. +In ICSE, pages 721–730, 2006. +[13] D. Woollard, O. ig Kwoun, T. Bicknell, S. Dunbar, and +K. Leung. A science data system approach for the smap +mission. In IEEE Radar, 2009. +[14] D. Woollard, N. Medvidovic, Y. Gil, and C. A. Mattmann. +Scientific software as workflows: From discovery to distribution. +Software, IEEE, 25(4):37–43, 2008. +[15] J. Yu and R. Buyya. A taxonomy of workflow management +systems for grid computing, Apr 2005. +View publication stats \ No newline at end of file diff --git a/src/main/resources/cdtocode/doc/Apache OODT File Manager/Catalog and Archive File Management Component-relation.txt b/src/main/resources/cdtocode/doc/Apache OODT File Manager/Catalog and Archive File Management Component-relation.txt new file mode 100644 index 0000000000000000000000000000000000000000..98281efb2fac7059f9bd7452c63995f31d82f4cf --- /dev/null +++ b/src/main/resources/cdtocode/doc/Apache OODT File Manager/Catalog and Archive File Management Component-relation.txt @@ -0,0 +1,109 @@ +Catalog and Archive File Management Component +Introduction +Project Description +Architecture +Extension Points +Current Extension Point Implementations +Use Cases +Conclusion +This is the developer guide for the Apache OODT Catalog and Archive Service (CAS) File Manager component, or File Manager for short. Primarily, this guide will explain the File Manager architecture and interfaces, including its tailorable extension points. For information on installation, configuration, and examples, please see our User Guides. + +The remainder of this guide is separated into the following sections: + +Project Description +Architecture +Extension Points +Current Extension Point Implementations +Project Description +The File Manager component is responsible for tracking, ingesting and moving file data and metadata between a client system and a server system. The File Manager is an extensible software component that provides an XML-RPC external interface, and a fully tailorable Java-based API for file management. + +Architecture +In this section, we will describe the architecture of the File Manager, including its constituent components, object model, and key capabilities. + +Components +The major components of the File Manager are the Client and Server, the Repository Manager, the Catalog, the Validation Layer, the Versioner, and the Transferer. The relationship between all of these components are shown in the diagram below: + +File Manager Architecture + +The File Manager Server contains both a Repository that manages products (and the products' location in the archive as specified by Versioner), and a Catalog that validates metadata via the Validation Layer. Transfer of data products from the Client to the Server is the domain of the Transfer and can be initiated at either the Client or the Server. + +Object Model +The critical objects managed by the File Manager include: + +Products - Collections of one or more files, and their associated Metadata. +Metadata - A map of key->multiple values of descriptive information about a Product. See CAS-Metadata for more information on Metadata. +Reference - A pointer to a Product file's (or files') original location, and to its final resting location within the archive constructed by the File Manager. +Product Type - Descriptive information about a Product that includes what type of file URI generation scheme to use, the root repository location for a particular Product, and a description of the Product. +Element - A singular Metadata element, such as "Author", or "Creator". Elements may have additional metadata, in the form of the associated definition and even a corresponding Dublin Core attribute. See CAS-Metadata for more information on Metadata Elements. +Versioner - A URI generation scheme for Product Types that defines the location within the archive (built by the File Manager) where a file belonging to a Product (that belongs to the associated Product Type) should be placed. +Each Product contains 1 or more References, and one Metadata object. Each Product is a member of a single Product Type. The Metadata collected for each Product is defined by a mapping of Product Type->1...* Elements. Each Product Type has an associated Versioner. These relationships are shown in the below figure. + +File Manager Object Model +Key Capabilities +The File manager has been designed with a new of key capabilities in mind. These capabilities include: + +Easy management of different types of Products. The Repository Manager extension point is responsible for managing Product Types, and their associated information. Management of Product Types includes adding new types, deleting and updating existing types, and retrieving Product Type Objects, by their ID or by their name. + +Support for different kinds of back end catalogs. The Catalog extension point allows Product instance metadata and file location information to be stored in different types of back end data stores quite easily. Existing implementations of the Catalog interface include a JDBC based back end database, along with a flat-file index powered by Lucene. + +Management of Product instance information. Management includes adding, deleting and updating product instance information, including file locations (References), along with Product Metadata. It also includes retrieving Metadata and References associated with existing Products as well as obtaining the Products themselves. + +Element management for Metadata. The File Manager's Validation Layer extension point allows for the management of Element policy information in different types of back end stores. For instance, Element policy could be stored in XML files, a Database, or a Metadata Registry. + +Data transfer mechanism interface. By having an extension point for Data Transfer, the File Manager can support different Data Transfer protocols, both local and remote. + +Advanced support for File Repository layouts. The Versioner extension point allows for different File Repository layouts based on Product Types. + +Support for multiple Product structures. The File Manager Client allows for Products to be Flat, or Hierarchical-based. Flat products are collections of singular files that are aggregated together to make a Product. Hierarchical Products are Products that contain collections of directories, and sub-directories, and files. + +Design for scalability. The File Manager uses the popular client-server paradigm, allowing new File Manager servers to be instantiated, as needed, without affecting the File Manager clients, and vice-versa. + +Standard communication protocols. The File Manager uses XML-RPC as its main external interface between the File Manager client and server. XML-RPC, the little brother of SOAP, is fast, extensible, and uses the underlying HTTP protocol for data transfer. + +RSS-based Product syndication. The File Manager web interface allows for the RSS-based syndication of Product feeds based on Product Type. + +Data transfer status tracking. The File Manager tracks all current Product and File transfers and even publishes an RSS-feed of existing transfers. + +This capability set is not exhaustive, and is meant to give the user a feel for what general features are provided by the File Manager. Most likely the user will find that the File Manager provides many other capabilities besides those described here. + +Extension Points +We have constructed the File Manager making use of the factory method pattern to provide multiple extension points for the File Manager. An extension point is an interface within the File Manager that can have many implementations. This is particularly useful when it comes to software component configuration because it allows different implementations of an existing interface to be selected at deployment time. + +The factory method pattern is a creational pattern common to object oriented design. Each File Manager extension point involves the implementation of two interfaces: an extension factory and an extension implementation. At run-time, the File Manager loads a properties file specifies a factory class to use during extension point instantiation. For example, the File Manager may communicate with a database-based Catalog and an XML-based Element Store (called a Validation Layer), or it may use a Lucene-based Catalog and a database-based Validation Layer. +Using extension points, it is fairly simple to support many different types of what are typically referred to as "plug-in architectures." Each of the core extension points for the File Manager is described below: + +Catalog The Catalog extension point is responsible for storing all the instance data for Products, Metadata, and for file References. Additionally, the Catalog provides a query capability for Products. +Data Transfer The Data Transfer extension point allows for the movement of a Product to and from the archive managed by the File Manager component. Different protocols for Data Transfer may include local (disk-based) copy, or remote XML-RPC based transfer across networked machines. +Repository Manager The Repository Manager extension point provides a means for managing all of the policy information (i.e., the Product Types and their associated information) for Products managed by the File Manager. +Validation Layer The Validation Layer extension point allows for the querying of element definitions associated with a particular Product Type. The extension point also maps Product Type to Elements. +Versioning The Versioning extension point allows for the definition of different URI generation schemes that define the final resting location of files for a particular Product. +System The extension point that provides the external interface to the File Manager services. This includes the File Manager server interface, as well as the associated File Manager client interface, that communicates with the server. +Current Extension Point Implementations +There are at least two implementations of all of the aforementioned extension points for the File Manager. Each extension point implementation is detailed in this section. + +Catalog +Data Source based Catalog. An implementation of the Catalog extension point interface that uses a JDBC accessible database backend. +Lucene based Catalog. An implementation of the Catalog extension point interface that uses the Lucene free text index system to store Product instance information. +Data Transfer +Local Data Transfer. An implementation of the Data Transfer interface that uses Apache's commons-io to perform local, disk based filesystem data transfer. This implementation also supports locally accessible Network File System (NFS) disks. +Remote Data Transfer. An implementation of the Data Transfer interface that uses the XML-RPC File Manager client to transfer files to a remote XML-RPC File Manager server. +InPlace Data Transfer. An implementation of the Data Transfer interface that avoids transfering any products -- this can be used in the situation where metadata about a particular product should be recorded, but no physical transfer needs to occur. +Repository Manager +Data Source based Repository Manager. An implementation of the Repository Manager extension point that stores Product Type policy information in a JDBC accessible database. +XML based Repository Manager. An implementation of the Repository Manager extension point that stores Product Type policy information in an XML file called product-types.xml +Validation Layer +Data Source based Validation Layer. An implementation of the Validation Layer extension point that stores Element policy information in a JDBC accessible database. +XML based Validation Layer. An implementation of the Validation Layer extension point that stores Element policy information in 2 XML files called elements.xml and product-type-element-map.xml +System (File Manager client and File Manager server) +XML-RPC based File Manager server. An implementation of the external server interface for the File Manager that uses XML-RPC as the transportation medium. +XML-RPC based File Manager client. An implementation of the client interface for the XML-RPC File Manager server that uses XML-RPC as the transportation medium. +Use Cases +The File Manager was built to support several of the above capabilities outlined in Section 3. In particular there were several use cases that we wanted to support, some of which are described below. + +File Manager Ingest Use Case +The red numbers in the above Figure correspond to a sequence of steps that occurs and a series of interactions between the different File Manager extension points in order to perform the file ingestion activity. In Step 1, a File Manager client is invoked for the ingest operation, which sends Metadata and References for a particular Product to ingest to the File Manager server’s System Interface extension point. The System Interface uses the information about Product Type policy made available by the Repository Manager in order to understand whether or not the product should be transferred, where it’s root repository path should be, and so on. The System Interface then catalogs the file References and Metadata using the Catalog extension point. During this catalog process, the Catalog extension point uses the Validation Layer to determine which Elements should be extracted for the particular Product, based upon its Product Type. After that, Data Transfer is initiated either at the client or server end, and the first step to Data Transfer is using the Product’s associated Versioner to generate final file References. After final file References have been determined, the file data is transferred by the server or by the client, using the Data Transfer extension point. + +Conclusion +The aim of this document is to provide information relevant to developers about the CAS File Manager. Specifically, this document has described the File Manager's architecture, including its constituent components, object model and key capabilities. Additionally, the this document provides an overview of the current implementations of the File Manager's extension points. + +In the Basic User Guide and Advanced User Guide, we will cover topics like installation, configuration, and example uses as well as advanced topics like scaling and other tips and tricks. \ No newline at end of file diff --git a/src/main/resources/cdtocode/doc/Apache OODT File Manager/Catalog and Archive File Management Component-simEnts.txt b/src/main/resources/cdtocode/doc/Apache OODT File Manager/Catalog and Archive File Management Component-simEnts.txt new file mode 100644 index 0000000000000000000000000000000000000000..98281efb2fac7059f9bd7452c63995f31d82f4cf --- /dev/null +++ b/src/main/resources/cdtocode/doc/Apache OODT File Manager/Catalog and Archive File Management Component-simEnts.txt @@ -0,0 +1,109 @@ +Catalog and Archive File Management Component +Introduction +Project Description +Architecture +Extension Points +Current Extension Point Implementations +Use Cases +Conclusion +This is the developer guide for the Apache OODT Catalog and Archive Service (CAS) File Manager component, or File Manager for short. Primarily, this guide will explain the File Manager architecture and interfaces, including its tailorable extension points. For information on installation, configuration, and examples, please see our User Guides. + +The remainder of this guide is separated into the following sections: + +Project Description +Architecture +Extension Points +Current Extension Point Implementations +Project Description +The File Manager component is responsible for tracking, ingesting and moving file data and metadata between a client system and a server system. The File Manager is an extensible software component that provides an XML-RPC external interface, and a fully tailorable Java-based API for file management. + +Architecture +In this section, we will describe the architecture of the File Manager, including its constituent components, object model, and key capabilities. + +Components +The major components of the File Manager are the Client and Server, the Repository Manager, the Catalog, the Validation Layer, the Versioner, and the Transferer. The relationship between all of these components are shown in the diagram below: + +File Manager Architecture + +The File Manager Server contains both a Repository that manages products (and the products' location in the archive as specified by Versioner), and a Catalog that validates metadata via the Validation Layer. Transfer of data products from the Client to the Server is the domain of the Transfer and can be initiated at either the Client or the Server. + +Object Model +The critical objects managed by the File Manager include: + +Products - Collections of one or more files, and their associated Metadata. +Metadata - A map of key->multiple values of descriptive information about a Product. See CAS-Metadata for more information on Metadata. +Reference - A pointer to a Product file's (or files') original location, and to its final resting location within the archive constructed by the File Manager. +Product Type - Descriptive information about a Product that includes what type of file URI generation scheme to use, the root repository location for a particular Product, and a description of the Product. +Element - A singular Metadata element, such as "Author", or "Creator". Elements may have additional metadata, in the form of the associated definition and even a corresponding Dublin Core attribute. See CAS-Metadata for more information on Metadata Elements. +Versioner - A URI generation scheme for Product Types that defines the location within the archive (built by the File Manager) where a file belonging to a Product (that belongs to the associated Product Type) should be placed. +Each Product contains 1 or more References, and one Metadata object. Each Product is a member of a single Product Type. The Metadata collected for each Product is defined by a mapping of Product Type->1...* Elements. Each Product Type has an associated Versioner. These relationships are shown in the below figure. + +File Manager Object Model +Key Capabilities +The File manager has been designed with a new of key capabilities in mind. These capabilities include: + +Easy management of different types of Products. The Repository Manager extension point is responsible for managing Product Types, and their associated information. Management of Product Types includes adding new types, deleting and updating existing types, and retrieving Product Type Objects, by their ID or by their name. + +Support for different kinds of back end catalogs. The Catalog extension point allows Product instance metadata and file location information to be stored in different types of back end data stores quite easily. Existing implementations of the Catalog interface include a JDBC based back end database, along with a flat-file index powered by Lucene. + +Management of Product instance information. Management includes adding, deleting and updating product instance information, including file locations (References), along with Product Metadata. It also includes retrieving Metadata and References associated with existing Products as well as obtaining the Products themselves. + +Element management for Metadata. The File Manager's Validation Layer extension point allows for the management of Element policy information in different types of back end stores. For instance, Element policy could be stored in XML files, a Database, or a Metadata Registry. + +Data transfer mechanism interface. By having an extension point for Data Transfer, the File Manager can support different Data Transfer protocols, both local and remote. + +Advanced support for File Repository layouts. The Versioner extension point allows for different File Repository layouts based on Product Types. + +Support for multiple Product structures. The File Manager Client allows for Products to be Flat, or Hierarchical-based. Flat products are collections of singular files that are aggregated together to make a Product. Hierarchical Products are Products that contain collections of directories, and sub-directories, and files. + +Design for scalability. The File Manager uses the popular client-server paradigm, allowing new File Manager servers to be instantiated, as needed, without affecting the File Manager clients, and vice-versa. + +Standard communication protocols. The File Manager uses XML-RPC as its main external interface between the File Manager client and server. XML-RPC, the little brother of SOAP, is fast, extensible, and uses the underlying HTTP protocol for data transfer. + +RSS-based Product syndication. The File Manager web interface allows for the RSS-based syndication of Product feeds based on Product Type. + +Data transfer status tracking. The File Manager tracks all current Product and File transfers and even publishes an RSS-feed of existing transfers. + +This capability set is not exhaustive, and is meant to give the user a feel for what general features are provided by the File Manager. Most likely the user will find that the File Manager provides many other capabilities besides those described here. + +Extension Points +We have constructed the File Manager making use of the factory method pattern to provide multiple extension points for the File Manager. An extension point is an interface within the File Manager that can have many implementations. This is particularly useful when it comes to software component configuration because it allows different implementations of an existing interface to be selected at deployment time. + +The factory method pattern is a creational pattern common to object oriented design. Each File Manager extension point involves the implementation of two interfaces: an extension factory and an extension implementation. At run-time, the File Manager loads a properties file specifies a factory class to use during extension point instantiation. For example, the File Manager may communicate with a database-based Catalog and an XML-based Element Store (called a Validation Layer), or it may use a Lucene-based Catalog and a database-based Validation Layer. +Using extension points, it is fairly simple to support many different types of what are typically referred to as "plug-in architectures." Each of the core extension points for the File Manager is described below: + +Catalog The Catalog extension point is responsible for storing all the instance data for Products, Metadata, and for file References. Additionally, the Catalog provides a query capability for Products. +Data Transfer The Data Transfer extension point allows for the movement of a Product to and from the archive managed by the File Manager component. Different protocols for Data Transfer may include local (disk-based) copy, or remote XML-RPC based transfer across networked machines. +Repository Manager The Repository Manager extension point provides a means for managing all of the policy information (i.e., the Product Types and their associated information) for Products managed by the File Manager. +Validation Layer The Validation Layer extension point allows for the querying of element definitions associated with a particular Product Type. The extension point also maps Product Type to Elements. +Versioning The Versioning extension point allows for the definition of different URI generation schemes that define the final resting location of files for a particular Product. +System The extension point that provides the external interface to the File Manager services. This includes the File Manager server interface, as well as the associated File Manager client interface, that communicates with the server. +Current Extension Point Implementations +There are at least two implementations of all of the aforementioned extension points for the File Manager. Each extension point implementation is detailed in this section. + +Catalog +Data Source based Catalog. An implementation of the Catalog extension point interface that uses a JDBC accessible database backend. +Lucene based Catalog. An implementation of the Catalog extension point interface that uses the Lucene free text index system to store Product instance information. +Data Transfer +Local Data Transfer. An implementation of the Data Transfer interface that uses Apache's commons-io to perform local, disk based filesystem data transfer. This implementation also supports locally accessible Network File System (NFS) disks. +Remote Data Transfer. An implementation of the Data Transfer interface that uses the XML-RPC File Manager client to transfer files to a remote XML-RPC File Manager server. +InPlace Data Transfer. An implementation of the Data Transfer interface that avoids transfering any products -- this can be used in the situation where metadata about a particular product should be recorded, but no physical transfer needs to occur. +Repository Manager +Data Source based Repository Manager. An implementation of the Repository Manager extension point that stores Product Type policy information in a JDBC accessible database. +XML based Repository Manager. An implementation of the Repository Manager extension point that stores Product Type policy information in an XML file called product-types.xml +Validation Layer +Data Source based Validation Layer. An implementation of the Validation Layer extension point that stores Element policy information in a JDBC accessible database. +XML based Validation Layer. An implementation of the Validation Layer extension point that stores Element policy information in 2 XML files called elements.xml and product-type-element-map.xml +System (File Manager client and File Manager server) +XML-RPC based File Manager server. An implementation of the external server interface for the File Manager that uses XML-RPC as the transportation medium. +XML-RPC based File Manager client. An implementation of the client interface for the XML-RPC File Manager server that uses XML-RPC as the transportation medium. +Use Cases +The File Manager was built to support several of the above capabilities outlined in Section 3. In particular there were several use cases that we wanted to support, some of which are described below. + +File Manager Ingest Use Case +The red numbers in the above Figure correspond to a sequence of steps that occurs and a series of interactions between the different File Manager extension points in order to perform the file ingestion activity. In Step 1, a File Manager client is invoked for the ingest operation, which sends Metadata and References for a particular Product to ingest to the File Manager server’s System Interface extension point. The System Interface uses the information about Product Type policy made available by the Repository Manager in order to understand whether or not the product should be transferred, where it’s root repository path should be, and so on. The System Interface then catalogs the file References and Metadata using the Catalog extension point. During this catalog process, the Catalog extension point uses the Validation Layer to determine which Elements should be extracted for the particular Product, based upon its Product Type. After that, Data Transfer is initiated either at the client or server end, and the first step to Data Transfer is using the Product’s associated Versioner to generate final file References. After final file References have been determined, the file data is transferred by the server or by the client, using the Data Transfer extension point. + +Conclusion +The aim of this document is to provide information relevant to developers about the CAS File Manager. Specifically, this document has described the File Manager's architecture, including its constituent components, object model and key capabilities. Additionally, the this document provides an overview of the current implementations of the File Manager's extension points. + +In the Basic User Guide and Advanced User Guide, we will cover topics like installation, configuration, and example uses as well as advanced topics like scaling and other tips and tricks. \ No newline at end of file diff --git a/src/main/resources/cdtocode/doc/Apache OODT File Manager/Catalog and Archive File Management Component.txt b/src/main/resources/cdtocode/doc/Apache OODT File Manager/Catalog and Archive File Management Component.txt new file mode 100644 index 0000000000000000000000000000000000000000..98281efb2fac7059f9bd7452c63995f31d82f4cf --- /dev/null +++ b/src/main/resources/cdtocode/doc/Apache OODT File Manager/Catalog and Archive File Management Component.txt @@ -0,0 +1,109 @@ +Catalog and Archive File Management Component +Introduction +Project Description +Architecture +Extension Points +Current Extension Point Implementations +Use Cases +Conclusion +This is the developer guide for the Apache OODT Catalog and Archive Service (CAS) File Manager component, or File Manager for short. Primarily, this guide will explain the File Manager architecture and interfaces, including its tailorable extension points. For information on installation, configuration, and examples, please see our User Guides. + +The remainder of this guide is separated into the following sections: + +Project Description +Architecture +Extension Points +Current Extension Point Implementations +Project Description +The File Manager component is responsible for tracking, ingesting and moving file data and metadata between a client system and a server system. The File Manager is an extensible software component that provides an XML-RPC external interface, and a fully tailorable Java-based API for file management. + +Architecture +In this section, we will describe the architecture of the File Manager, including its constituent components, object model, and key capabilities. + +Components +The major components of the File Manager are the Client and Server, the Repository Manager, the Catalog, the Validation Layer, the Versioner, and the Transferer. The relationship between all of these components are shown in the diagram below: + +File Manager Architecture + +The File Manager Server contains both a Repository that manages products (and the products' location in the archive as specified by Versioner), and a Catalog that validates metadata via the Validation Layer. Transfer of data products from the Client to the Server is the domain of the Transfer and can be initiated at either the Client or the Server. + +Object Model +The critical objects managed by the File Manager include: + +Products - Collections of one or more files, and their associated Metadata. +Metadata - A map of key->multiple values of descriptive information about a Product. See CAS-Metadata for more information on Metadata. +Reference - A pointer to a Product file's (or files') original location, and to its final resting location within the archive constructed by the File Manager. +Product Type - Descriptive information about a Product that includes what type of file URI generation scheme to use, the root repository location for a particular Product, and a description of the Product. +Element - A singular Metadata element, such as "Author", or "Creator". Elements may have additional metadata, in the form of the associated definition and even a corresponding Dublin Core attribute. See CAS-Metadata for more information on Metadata Elements. +Versioner - A URI generation scheme for Product Types that defines the location within the archive (built by the File Manager) where a file belonging to a Product (that belongs to the associated Product Type) should be placed. +Each Product contains 1 or more References, and one Metadata object. Each Product is a member of a single Product Type. The Metadata collected for each Product is defined by a mapping of Product Type->1...* Elements. Each Product Type has an associated Versioner. These relationships are shown in the below figure. + +File Manager Object Model +Key Capabilities +The File manager has been designed with a new of key capabilities in mind. These capabilities include: + +Easy management of different types of Products. The Repository Manager extension point is responsible for managing Product Types, and their associated information. Management of Product Types includes adding new types, deleting and updating existing types, and retrieving Product Type Objects, by their ID or by their name. + +Support for different kinds of back end catalogs. The Catalog extension point allows Product instance metadata and file location information to be stored in different types of back end data stores quite easily. Existing implementations of the Catalog interface include a JDBC based back end database, along with a flat-file index powered by Lucene. + +Management of Product instance information. Management includes adding, deleting and updating product instance information, including file locations (References), along with Product Metadata. It also includes retrieving Metadata and References associated with existing Products as well as obtaining the Products themselves. + +Element management for Metadata. The File Manager's Validation Layer extension point allows for the management of Element policy information in different types of back end stores. For instance, Element policy could be stored in XML files, a Database, or a Metadata Registry. + +Data transfer mechanism interface. By having an extension point for Data Transfer, the File Manager can support different Data Transfer protocols, both local and remote. + +Advanced support for File Repository layouts. The Versioner extension point allows for different File Repository layouts based on Product Types. + +Support for multiple Product structures. The File Manager Client allows for Products to be Flat, or Hierarchical-based. Flat products are collections of singular files that are aggregated together to make a Product. Hierarchical Products are Products that contain collections of directories, and sub-directories, and files. + +Design for scalability. The File Manager uses the popular client-server paradigm, allowing new File Manager servers to be instantiated, as needed, without affecting the File Manager clients, and vice-versa. + +Standard communication protocols. The File Manager uses XML-RPC as its main external interface between the File Manager client and server. XML-RPC, the little brother of SOAP, is fast, extensible, and uses the underlying HTTP protocol for data transfer. + +RSS-based Product syndication. The File Manager web interface allows for the RSS-based syndication of Product feeds based on Product Type. + +Data transfer status tracking. The File Manager tracks all current Product and File transfers and even publishes an RSS-feed of existing transfers. + +This capability set is not exhaustive, and is meant to give the user a feel for what general features are provided by the File Manager. Most likely the user will find that the File Manager provides many other capabilities besides those described here. + +Extension Points +We have constructed the File Manager making use of the factory method pattern to provide multiple extension points for the File Manager. An extension point is an interface within the File Manager that can have many implementations. This is particularly useful when it comes to software component configuration because it allows different implementations of an existing interface to be selected at deployment time. + +The factory method pattern is a creational pattern common to object oriented design. Each File Manager extension point involves the implementation of two interfaces: an extension factory and an extension implementation. At run-time, the File Manager loads a properties file specifies a factory class to use during extension point instantiation. For example, the File Manager may communicate with a database-based Catalog and an XML-based Element Store (called a Validation Layer), or it may use a Lucene-based Catalog and a database-based Validation Layer. +Using extension points, it is fairly simple to support many different types of what are typically referred to as "plug-in architectures." Each of the core extension points for the File Manager is described below: + +Catalog The Catalog extension point is responsible for storing all the instance data for Products, Metadata, and for file References. Additionally, the Catalog provides a query capability for Products. +Data Transfer The Data Transfer extension point allows for the movement of a Product to and from the archive managed by the File Manager component. Different protocols for Data Transfer may include local (disk-based) copy, or remote XML-RPC based transfer across networked machines. +Repository Manager The Repository Manager extension point provides a means for managing all of the policy information (i.e., the Product Types and their associated information) for Products managed by the File Manager. +Validation Layer The Validation Layer extension point allows for the querying of element definitions associated with a particular Product Type. The extension point also maps Product Type to Elements. +Versioning The Versioning extension point allows for the definition of different URI generation schemes that define the final resting location of files for a particular Product. +System The extension point that provides the external interface to the File Manager services. This includes the File Manager server interface, as well as the associated File Manager client interface, that communicates with the server. +Current Extension Point Implementations +There are at least two implementations of all of the aforementioned extension points for the File Manager. Each extension point implementation is detailed in this section. + +Catalog +Data Source based Catalog. An implementation of the Catalog extension point interface that uses a JDBC accessible database backend. +Lucene based Catalog. An implementation of the Catalog extension point interface that uses the Lucene free text index system to store Product instance information. +Data Transfer +Local Data Transfer. An implementation of the Data Transfer interface that uses Apache's commons-io to perform local, disk based filesystem data transfer. This implementation also supports locally accessible Network File System (NFS) disks. +Remote Data Transfer. An implementation of the Data Transfer interface that uses the XML-RPC File Manager client to transfer files to a remote XML-RPC File Manager server. +InPlace Data Transfer. An implementation of the Data Transfer interface that avoids transfering any products -- this can be used in the situation where metadata about a particular product should be recorded, but no physical transfer needs to occur. +Repository Manager +Data Source based Repository Manager. An implementation of the Repository Manager extension point that stores Product Type policy information in a JDBC accessible database. +XML based Repository Manager. An implementation of the Repository Manager extension point that stores Product Type policy information in an XML file called product-types.xml +Validation Layer +Data Source based Validation Layer. An implementation of the Validation Layer extension point that stores Element policy information in a JDBC accessible database. +XML based Validation Layer. An implementation of the Validation Layer extension point that stores Element policy information in 2 XML files called elements.xml and product-type-element-map.xml +System (File Manager client and File Manager server) +XML-RPC based File Manager server. An implementation of the external server interface for the File Manager that uses XML-RPC as the transportation medium. +XML-RPC based File Manager client. An implementation of the client interface for the XML-RPC File Manager server that uses XML-RPC as the transportation medium. +Use Cases +The File Manager was built to support several of the above capabilities outlined in Section 3. In particular there were several use cases that we wanted to support, some of which are described below. + +File Manager Ingest Use Case +The red numbers in the above Figure correspond to a sequence of steps that occurs and a series of interactions between the different File Manager extension points in order to perform the file ingestion activity. In Step 1, a File Manager client is invoked for the ingest operation, which sends Metadata and References for a particular Product to ingest to the File Manager server’s System Interface extension point. The System Interface uses the information about Product Type policy made available by the Repository Manager in order to understand whether or not the product should be transferred, where it’s root repository path should be, and so on. The System Interface then catalogs the file References and Metadata using the Catalog extension point. During this catalog process, the Catalog extension point uses the Validation Layer to determine which Elements should be extracted for the particular Product, based upon its Product Type. After that, Data Transfer is initiated either at the client or server end, and the first step to Data Transfer is using the Product’s associated Versioner to generate final file References. After final file References have been determined, the file data is transferred by the server or by the client, using the Data Transfer extension point. + +Conclusion +The aim of this document is to provide information relevant to developers about the CAS File Manager. Specifically, this document has described the File Manager's architecture, including its constituent components, object model and key capabilities. Additionally, the this document provides an overview of the current implementations of the File Manager's extension points. + +In the Basic User Guide and Advanced User Guide, we will cover topics like installation, configuration, and example uses as well as advanced topics like scaling and other tips and tricks. \ No newline at end of file diff --git a/src/main/resources/cdtocode/doc/Apache OODT File Manager/File Manager Scale Out Planning - OODT - Apache Software Foundation-relation.txt b/src/main/resources/cdtocode/doc/Apache OODT File Manager/File Manager Scale Out Planning - OODT - Apache Software Foundation-relation.txt new file mode 100644 index 0000000000000000000000000000000000000000..4d392f092f1e1d48e7df22228826768f1c7e62aa --- /dev/null +++ b/src/main/resources/cdtocode/doc/Apache OODT File Manager/File Manager Scale Out Planning - OODT - Apache Software Foundation-relation.txt @@ -0,0 +1,46 @@ + +转至元数据结尾 +由 Tom Barber创建, 最终由 Lewis John McGibbney修改于 三月 23, 2016转至元数据起始 +Goals +Background and strategic fit +Assumptions +Requirements +User interaction and design +Phase 1 +Phase 2 +Questions +Not Doing +Target release 0.13 +Epic +Document status DRAFT +Document owner +Tom Barber + +Designer +Developers +QA +Goals +Allow for OODT filemanager linear scalaing, distribution and seamless querying of multiple filemanagers on separate networks. +Background and strategic fit +To truely fit the "Distributed" aspect of OODT, the file manager component should allow for multiple file managers to be started and they allow for both local and remote querying of the file managers seamlessly. For example, I have an OODT installation in the UK and an OODT installation on a separate site in Australia, it makes more sense from a data transfer and performance perspective to allow those 2 FM's to operate independantly of each other, but allow for querying of both repositories as if they were one, so I could choose to retieve information from the Australia FM. This also allows users to say "give me all Excel files from all my sites", without having to point fm-client at all the different instances. + +Assumptions +Requirements +Mutliple file manager configuration and monitoring via PCS Developers should be able to define multiple file managers in the PCS so they are exposed to services like OPSUI +Query execution location A user using a file manager client, should be able to tell the file manager client it only wants to search the local file manager, or search a specific remote, or search every available file manager for data. +Coping with broken communications The File Manager should be able to cope with remote nodes going offline or becoming unavailable and fail gracefully. +Schema alignment/matching A user wishes to execute a query without knowing the underlying data model, on more than one file manager server. The query should be executed regardless and the relevant content returned. Must Have +User interaction and design +If you use filemgr client then a very simplistic implementation would be to extend the service to allow definitions of multiple local and remote file managers and allow the user to execute a query over each file manager and concat the result. This would be very quick to implement but doesn't scale well or support other services. + +Phase 1 +Currently OPSUI and the PCS platform only allows you to define 1 file manager per opsui instance, but to implement this feature successfully the OPSUI configuration needs to allow for multiple file managers, and when a user looks at the summary page the summary should show a grouping of all the available file managers, whilst also allowing the ability to filter by file manager. + +In summary the following changes will be made: + +Registration of multiple file managers within PCS/OPSUI +Changes made to the OPSUI monitoring pages to reflect multiple filemanagers +Changes made to the Product listing pages to allow display of content from multiple filemanagers and filter by file manager +Additional configuration within filemgr client to allow for lookup of available filemanagers from PCS and query them. +Phase 2 +Phase 2 of the enhancement would involve adding further distributed capabilities to the filemanager(and possibly PCS platform as a whole), by adding in an optional zookeeper configuration that would allow for nodes self registering, graceful handling of nodes disappearing and also leader election and so forth. I feel enhancing OODT with industry standard distributed configuration management that is already widely used in "Big Data" type deployments will help with scalability of the platform and resiliency over distributed locations. \ No newline at end of file diff --git a/src/main/resources/cdtocode/doc/Apache OODT File Manager/File Manager Scale Out Planning - OODT - Apache Software Foundation-simEnts.txt b/src/main/resources/cdtocode/doc/Apache OODT File Manager/File Manager Scale Out Planning - OODT - Apache Software Foundation-simEnts.txt new file mode 100644 index 0000000000000000000000000000000000000000..4d392f092f1e1d48e7df22228826768f1c7e62aa --- /dev/null +++ b/src/main/resources/cdtocode/doc/Apache OODT File Manager/File Manager Scale Out Planning - OODT - Apache Software Foundation-simEnts.txt @@ -0,0 +1,46 @@ + +转至元数据结尾 +由 Tom Barber创建, 最终由 Lewis John McGibbney修改于 三月 23, 2016转至元数据起始 +Goals +Background and strategic fit +Assumptions +Requirements +User interaction and design +Phase 1 +Phase 2 +Questions +Not Doing +Target release 0.13 +Epic +Document status DRAFT +Document owner +Tom Barber + +Designer +Developers +QA +Goals +Allow for OODT filemanager linear scalaing, distribution and seamless querying of multiple filemanagers on separate networks. +Background and strategic fit +To truely fit the "Distributed" aspect of OODT, the file manager component should allow for multiple file managers to be started and they allow for both local and remote querying of the file managers seamlessly. For example, I have an OODT installation in the UK and an OODT installation on a separate site in Australia, it makes more sense from a data transfer and performance perspective to allow those 2 FM's to operate independantly of each other, but allow for querying of both repositories as if they were one, so I could choose to retieve information from the Australia FM. This also allows users to say "give me all Excel files from all my sites", without having to point fm-client at all the different instances. + +Assumptions +Requirements +Mutliple file manager configuration and monitoring via PCS Developers should be able to define multiple file managers in the PCS so they are exposed to services like OPSUI +Query execution location A user using a file manager client, should be able to tell the file manager client it only wants to search the local file manager, or search a specific remote, or search every available file manager for data. +Coping with broken communications The File Manager should be able to cope with remote nodes going offline or becoming unavailable and fail gracefully. +Schema alignment/matching A user wishes to execute a query without knowing the underlying data model, on more than one file manager server. The query should be executed regardless and the relevant content returned. Must Have +User interaction and design +If you use filemgr client then a very simplistic implementation would be to extend the service to allow definitions of multiple local and remote file managers and allow the user to execute a query over each file manager and concat the result. This would be very quick to implement but doesn't scale well or support other services. + +Phase 1 +Currently OPSUI and the PCS platform only allows you to define 1 file manager per opsui instance, but to implement this feature successfully the OPSUI configuration needs to allow for multiple file managers, and when a user looks at the summary page the summary should show a grouping of all the available file managers, whilst also allowing the ability to filter by file manager. + +In summary the following changes will be made: + +Registration of multiple file managers within PCS/OPSUI +Changes made to the OPSUI monitoring pages to reflect multiple filemanagers +Changes made to the Product listing pages to allow display of content from multiple filemanagers and filter by file manager +Additional configuration within filemgr client to allow for lookup of available filemanagers from PCS and query them. +Phase 2 +Phase 2 of the enhancement would involve adding further distributed capabilities to the filemanager(and possibly PCS platform as a whole), by adding in an optional zookeeper configuration that would allow for nodes self registering, graceful handling of nodes disappearing and also leader election and so forth. I feel enhancing OODT with industry standard distributed configuration management that is already widely used in "Big Data" type deployments will help with scalability of the platform and resiliency over distributed locations. \ No newline at end of file diff --git a/src/main/resources/cdtocode/doc/Apache OODT File Manager/File Manager Scale Out Planning - OODT - Apache Software Foundation.txt b/src/main/resources/cdtocode/doc/Apache OODT File Manager/File Manager Scale Out Planning - OODT - Apache Software Foundation.txt new file mode 100644 index 0000000000000000000000000000000000000000..4d392f092f1e1d48e7df22228826768f1c7e62aa --- /dev/null +++ b/src/main/resources/cdtocode/doc/Apache OODT File Manager/File Manager Scale Out Planning - OODT - Apache Software Foundation.txt @@ -0,0 +1,46 @@ + +转至元数据结尾 +由 Tom Barber创建, 最终由 Lewis John McGibbney修改于 三月 23, 2016转至元数据起始 +Goals +Background and strategic fit +Assumptions +Requirements +User interaction and design +Phase 1 +Phase 2 +Questions +Not Doing +Target release 0.13 +Epic +Document status DRAFT +Document owner +Tom Barber + +Designer +Developers +QA +Goals +Allow for OODT filemanager linear scalaing, distribution and seamless querying of multiple filemanagers on separate networks. +Background and strategic fit +To truely fit the "Distributed" aspect of OODT, the file manager component should allow for multiple file managers to be started and they allow for both local and remote querying of the file managers seamlessly. For example, I have an OODT installation in the UK and an OODT installation on a separate site in Australia, it makes more sense from a data transfer and performance perspective to allow those 2 FM's to operate independantly of each other, but allow for querying of both repositories as if they were one, so I could choose to retieve information from the Australia FM. This also allows users to say "give me all Excel files from all my sites", without having to point fm-client at all the different instances. + +Assumptions +Requirements +Mutliple file manager configuration and monitoring via PCS Developers should be able to define multiple file managers in the PCS so they are exposed to services like OPSUI +Query execution location A user using a file manager client, should be able to tell the file manager client it only wants to search the local file manager, or search a specific remote, or search every available file manager for data. +Coping with broken communications The File Manager should be able to cope with remote nodes going offline or becoming unavailable and fail gracefully. +Schema alignment/matching A user wishes to execute a query without knowing the underlying data model, on more than one file manager server. The query should be executed regardless and the relevant content returned. Must Have +User interaction and design +If you use filemgr client then a very simplistic implementation would be to extend the service to allow definitions of multiple local and remote file managers and allow the user to execute a query over each file manager and concat the result. This would be very quick to implement but doesn't scale well or support other services. + +Phase 1 +Currently OPSUI and the PCS platform only allows you to define 1 file manager per opsui instance, but to implement this feature successfully the OPSUI configuration needs to allow for multiple file managers, and when a user looks at the summary page the summary should show a grouping of all the available file managers, whilst also allowing the ability to filter by file manager. + +In summary the following changes will be made: + +Registration of multiple file managers within PCS/OPSUI +Changes made to the OPSUI monitoring pages to reflect multiple filemanagers +Changes made to the Product listing pages to allow display of content from multiple filemanagers and filter by file manager +Additional configuration within filemgr client to allow for lookup of available filemanagers from PCS and query them. +Phase 2 +Phase 2 of the enhancement would involve adding further distributed capabilities to the filemanager(and possibly PCS platform as a whole), by adding in an optional zookeeper configuration that would allow for nodes self registering, graceful handling of nodes disappearing and also leader election and so forth. I feel enhancing OODT with industry standard distributed configuration management that is already widely used in "Big Data" type deployments will help with scalability of the platform and resiliency over distributed locations. \ No newline at end of file diff --git a/src/main/resources/cdtocode/doc/Apache OODT File Manager/Interface Ingester-relation.txt b/src/main/resources/cdtocode/doc/Apache OODT File Manager/Interface Ingester-relation.txt new file mode 100644 index 0000000000000000000000000000000000000000..64874cd0ab7710c92ba8961cff86e020932c7839 --- /dev/null +++ b/src/main/resources/cdtocode/doc/Apache OODT File Manager/Interface Ingester-relation.txt @@ -0,0 +1,26 @@ +Catalog and Archive File Management Component 0.12 API +Packages +Package Description +org.apache.oodt.cas.filemgr.catalog +org.apache.oodt.cas.filemgr.catalog.solr +org.apache.oodt.cas.filemgr.cli.action +org.apache.oodt.cas.filemgr.datatransfer +org.apache.oodt.cas.filemgr.exceptions +org.apache.oodt.cas.filemgr.ingest +org.apache.oodt.cas.filemgr.metadata +org.apache.oodt.cas.filemgr.metadata.extractors +org.apache.oodt.cas.filemgr.metadata.extractors.examples +org.apache.oodt.cas.filemgr.repository +org.apache.oodt.cas.filemgr.structs +org.apache.oodt.cas.filemgr.structs.exceptions +org.apache.oodt.cas.filemgr.structs.query +org.apache.oodt.cas.filemgr.structs.query.conv +org.apache.oodt.cas.filemgr.structs.query.filter +org.apache.oodt.cas.filemgr.structs.type +org.apache.oodt.cas.filemgr.structs.type.examples +org.apache.oodt.cas.filemgr.system +org.apache.oodt.cas.filemgr.system.auth +org.apache.oodt.cas.filemgr.tools +org.apache.oodt.cas.filemgr.util +org.apache.oodt.cas.filemgr.validation +org.apache.oodt.cas.filemgr.versioning \ No newline at end of file diff --git a/src/main/resources/cdtocode/doc/Apache OODT File Manager/Interface Ingester-simEnts.txt b/src/main/resources/cdtocode/doc/Apache OODT File Manager/Interface Ingester-simEnts.txt new file mode 100644 index 0000000000000000000000000000000000000000..64874cd0ab7710c92ba8961cff86e020932c7839 --- /dev/null +++ b/src/main/resources/cdtocode/doc/Apache OODT File Manager/Interface Ingester-simEnts.txt @@ -0,0 +1,26 @@ +Catalog and Archive File Management Component 0.12 API +Packages +Package Description +org.apache.oodt.cas.filemgr.catalog +org.apache.oodt.cas.filemgr.catalog.solr +org.apache.oodt.cas.filemgr.cli.action +org.apache.oodt.cas.filemgr.datatransfer +org.apache.oodt.cas.filemgr.exceptions +org.apache.oodt.cas.filemgr.ingest +org.apache.oodt.cas.filemgr.metadata +org.apache.oodt.cas.filemgr.metadata.extractors +org.apache.oodt.cas.filemgr.metadata.extractors.examples +org.apache.oodt.cas.filemgr.repository +org.apache.oodt.cas.filemgr.structs +org.apache.oodt.cas.filemgr.structs.exceptions +org.apache.oodt.cas.filemgr.structs.query +org.apache.oodt.cas.filemgr.structs.query.conv +org.apache.oodt.cas.filemgr.structs.query.filter +org.apache.oodt.cas.filemgr.structs.type +org.apache.oodt.cas.filemgr.structs.type.examples +org.apache.oodt.cas.filemgr.system +org.apache.oodt.cas.filemgr.system.auth +org.apache.oodt.cas.filemgr.tools +org.apache.oodt.cas.filemgr.util +org.apache.oodt.cas.filemgr.validation +org.apache.oodt.cas.filemgr.versioning \ No newline at end of file diff --git a/src/main/resources/cdtocode/doc/Apache OODT File Manager/Interface Ingester.txt b/src/main/resources/cdtocode/doc/Apache OODT File Manager/Interface Ingester.txt new file mode 100644 index 0000000000000000000000000000000000000000..64874cd0ab7710c92ba8961cff86e020932c7839 --- /dev/null +++ b/src/main/resources/cdtocode/doc/Apache OODT File Manager/Interface Ingester.txt @@ -0,0 +1,26 @@ +Catalog and Archive File Management Component 0.12 API +Packages +Package Description +org.apache.oodt.cas.filemgr.catalog +org.apache.oodt.cas.filemgr.catalog.solr +org.apache.oodt.cas.filemgr.cli.action +org.apache.oodt.cas.filemgr.datatransfer +org.apache.oodt.cas.filemgr.exceptions +org.apache.oodt.cas.filemgr.ingest +org.apache.oodt.cas.filemgr.metadata +org.apache.oodt.cas.filemgr.metadata.extractors +org.apache.oodt.cas.filemgr.metadata.extractors.examples +org.apache.oodt.cas.filemgr.repository +org.apache.oodt.cas.filemgr.structs +org.apache.oodt.cas.filemgr.structs.exceptions +org.apache.oodt.cas.filemgr.structs.query +org.apache.oodt.cas.filemgr.structs.query.conv +org.apache.oodt.cas.filemgr.structs.query.filter +org.apache.oodt.cas.filemgr.structs.type +org.apache.oodt.cas.filemgr.structs.type.examples +org.apache.oodt.cas.filemgr.system +org.apache.oodt.cas.filemgr.system.auth +org.apache.oodt.cas.filemgr.tools +org.apache.oodt.cas.filemgr.util +org.apache.oodt.cas.filemgr.validation +org.apache.oodt.cas.filemgr.versioning \ No newline at end of file diff --git a/src/main/resources/cdtocode/doc/Apache OODT File Manager/Mahasen Distributed Storage Resource Broker-relation.txt b/src/main/resources/cdtocode/doc/Apache OODT File Manager/Mahasen Distributed Storage Resource Broker-relation.txt new file mode 100644 index 0000000000000000000000000000000000000000..67416259f0fde4ffff53c54c973b52b030fce5e7 --- /dev/null +++ b/src/main/resources/cdtocode/doc/Apache OODT File Manager/Mahasen Distributed Storage Resource Broker-relation.txt @@ -0,0 +1,141 @@ +HAL Id:hal-01513774 +https://hal.inria.fr/hal-01513774 +Submitted on25Apr2017 +HAL is amulti-disciplinaryopenaccess +archiveforthedepositanddisseminationofsci- +entificresearchdocuments,whethertheyarepub- +lished ornot.Thedocumentsmaycomefrom +teachingandresearchinstitutionsinFranceor +abroad, orfrompublicorprivateresearchcenters. +L’archiveouvertepluridisciplinaire HAL, est +destinée audépôtetàladiffusiondedocuments +scientifiquesdeniveaurecherche,publiésounon, +émanantdesétablissementsd’enseignementetde +recherchefrançaisouétrangers,deslaboratoires +publics ouprivés. +Distributed underaCreativeCommons Attribution| 4.0InternationalLicense +Mahasen: DistributedStorageResourceBroker +K. Perera,T.Kishanthan,H.Perera,D.Madola,MalakaWalpola,Srinath +Perera +Tocitethisversion: +K. Perera,T.Kishanthan,H.Perera,D.Madola,MalakaWalpola,etal..Mahasen:Distributed +Storage ResourceBroker.10thInternationalConferenceonNetworkandParallelComputing(NPC), +Sep 2013,Guiyang,China.pp.380-392,￿10.1007/978-3-642-40820-5_32￿.￿hal-01513774￿ +Mahasen: Distributed Storage Resource Broker +K.D.A.K.S.Perera1, T Kishanthan1, H.A.S.Perera1, D.T.H.V.Madola1, Malaka Walpola1, Srinath Perera2 +1 Computer Science and Engineering Department, University Of Moratuwa, Sri Lanka. {shelanrc, kshanth2101, ashansa.perera, hirunimadola, malaka.uom}@gmail.com +2 WSO2 Lanka, No 59, Flower Road, Colombo 07, Sri Lanka +srinath@wso2.com +Abstract. Modern day systems are facing an avalanche of data, and they are being forced to handle more and more data intensive use cases. These data comes in many forms and shapes: Sensors (RFID, Near Field Communication, Weather Sensors), transaction logs, Web, social networks etc. As an example, weather sensors across the world generate a large amount of data throughout the year. Handling these and similar data require scalable, efficient, reliable and very large storages with support for efficient metadata based searching. This paper present Mahasen, a highly scalable storage for high volume data intensive applications built on top of a peer-to-peer layer. In addition to scalable storage, Mahasen also supports efficient searching, built on top of the Distributed Hash table (DHT) +1 Introduction +Currently United States collects weather data from many sources like Doppler readers deployed across the country, aircrafts, mobile towers and Balloons etc. These sensors keep generating a sizable amount of data. Processing them efficiently as needed is pushing our understanding about large-scale data processing to its limits. +Among many challenges data poses, a prominent one is storing the data and indexing them so that scientist and researchers can come and ask for specific type of data collected at a given time and in a given region. For example, a scientist may want to search for all Automated Weather data items collected in Bloomington area in June 15 between 8am-12pm. +Although we have presented meteorology as an example, there are many similar use cases. For instance, Sky server [1] is one of the best examples that illustrate the use case of large data generation. This project expects to collect 40 terabytes of data in five years. In its data collection, the photometric catalog is expected to contain about 500 distinct attributes for each of one hundred million galaxies, one hundred million stars, and one million quasars. Similarly many sciences, analytic processing organizations, data mining use cases etc., would want to store large amount of data and process them later in a selective manner. These systems often store data as files +and there have been several efforts to build large scale Metadata catalogs [2][3] and storage solutions[4][5] to support storing and searching those data items. One such example is AMGA metadata catalog [6] which was an effort to build replication and distribution mechanism for metadata catalogs. +As we discuss in the related work section, most of the metadata catalog implementations use centralized architectures and therefore have limited scalability unlike Mahasen. For example, Nirvana Storage [7] has a centralized metadata catalog which only supports scalability through vendor’s mechanism such as Oracle Real Application clusters. XML Metadata Concept catalog (XMC Cat) [8] is another centralized metadata catalog which stores hierarchical rich metadata. This paper presents Mahasen, a scalable metadata catalog and storage server built on top of a P2P technology. Further, it is built by distributing an open source centralized Data registry (WSO2 Registry). +Mahasen (Distributed Storage Resource Broker) is a Data Grid Management System (DGMS) that can manage a large volume of distributed data. It targets high volume data intensive applications. The architecture of Mahasen has been designed to present a single global logical namespace across all the stored data, and it maintains a metadata structure which can be used to search files based on its’ attributes. It is a network of storage servers that plays the dual purpose of a metadata catalog and a storage server. Mahasen will solve the huge data storage problem and fault tolerance in data intensive computing through aggregating low cost hardware while having both metadata and actual resources distributed without single point of failure. Metadata management will ensure the capability of searching files based on attributes of the stored resources. Mahasen has a metadata catalog, which is highly distributed and well scalable. The metadata layer ensures fault tolerance by keeping replicas of metadata. +The rest of the paper is organized as follows. The next section will discuss the related work in Metadata catalogs and Storage servers while comparing and contrasting them with Mahasen. The following section will discuss Mahasen architecture. The next section will present the performance evaluation of Mahasen. Finally the discussion section discusses limitations, other potential solutions and directions. +2 Related Work +2.1 Nirvana Storage +Nirvana SRB [7] is a middleware system that federates large heterogeneous data resources distributed across a network. The ability to access, manage, search and organize data across the entire SRB Federation is provided via a Global Namespace. MCAT is the centralized metadata repository which maintains two types of records – system- and user-metadata. Scalability of MCAT is achieved using database vendor’s mechanisms [9], hence limited by Relational DB scalability Limits. +Storage/Replication. The stored resources are divided as Physical resources, Logical resources and Cluster resources. Replication of resources across multiple servers ensures the availability and recoverability of resources during failovers. +Retrieve. Data stream routing is handled by SRB and TCP/IP, making the data transfer process transparent to the users.. +Search. Searching is done based on metadata attributes which are extracted and managed by the SRB. +Add/Update. Data can be added in two ways: Registration and Ingestion. Registration does not transfer any data but only creates a pointer to the data in MCAT. Ingestion is similar to registration but also transfers the data to an SRB storage resource. +Delete. If a file shadow object is used as a data object to ingest a file resource to SRB then file will be removed from MCAT but not from the physical location. +2.2 Apache OODT +OODT[10] is a middleware system for metadata that provides transparent access to the resources. It facilitates functionalities such as store, retrieve, search and analyze distributed data, objects and databases jointly. OODT provides a product service and profile service which manage data and metadata respectively. +Storage/Replication. OODT stores data product in a file-based storage in a distributed manner. They classify storage into three categories: on-line, near-line or off-line storage. +Retrieve. When OODT receives a request for retrieving a file, it issues a profile query to a product server that helps in resolving resources that could provide data. The response will include the target product server address in the form of a URI. The OODT issues a product query based on the profile query results to get the data, and it will actually retrieve data from the product server in a MIME-compliant format. +Search. OODT uses the profile server and the product server for searching the metadata and retrieve the products, and it has multiple of each type of server. OODT is based on client server architecture and it promotes REST-style architectural pattern for search and retrieve data. The profile or a subset of profile is returned for retrieval. +Add/Update. OODT provide data management including manage files and folders with the implementation of javax.sql.datasource interface. +Delete. The file management component of a Catalog and Archive Service support the delete of resource files and metadata through the implementation of javax.sql.datasource interface. +2.3 WSO2 Governance Registry +WSO2 Governance Registry [11] is a repository that allows users to store resources in a tree-structured manner, just like with a file system. However, unlike a file system, users may annotate resources using their custom properties, and also WSO2 Registry has built in metadata management features like tagging, associating resources. +However, WSO2 registry is backed by a Relational Database system, and it uses database features to store data, metadata, to manage them, and to search. Hence it has a centralized architecture. Mahasen extends that architecture to a distributed architecture. +Replication. There is no inbuilt mechanism to do the replication of resources in WSO2 registry. +Search. The WSO2 registry provides two types of searches. One is searching for a resource with their name, metadata etc., and it is implemented using underline relational database system. The second one is searching the content of resources, and implemented using Lucene [12]. The second search is only applicable to resources with textual content. +Add/Update. Adding of resources to registry can be done in two ways. First one is adding via the web interface provided by the registry. When adding a new resource, it is also possible to add additional metadata such as tags, properties of name value pairs, which later will be useful to search for that resource. The other way to add resources is by writing your own way by extending the registry API and exposing it as a web service. +The major limitation with registry, when storing resources, is the amount of memory available. Since it uses the java heap memory to buffer the resources before storing them, large files cannot be stored as the available memory is only limited to few hundred of megabytes. +2.4 Hadoop Distributed File System +Apache Hadoop Distributed File System is (HDFS)[13] is a file system designed to run on commodity hardware. HDFS has a master slave architecture that consists of a single NameNode as master and number of DataNodes. The NameNode is responsible of regulating access to files by client and managing the namespace of the file system. Generally DataNodes are deployed one per node in the cluster, and is responsible of managing storage attached to that node. +Storage / Replication. Hadoop supports hierarchical file organization where user can create directories and store files. It splits the file in to chunks with the default size of 64MB and stores them as sequence of blocks, and those blocks are stored in underlying file system of DataNodes. Those blocks are replicated for fault tolerance and the block size and the replication factor of data are configurable. +Retrieve. Applications that run on HDFS need streaming access to their data sets. Data nodes will be responsible for the read requests that issued from a user to retrieve data from the system. +Search. Hadoop Distributed File System does not provide a comprehensive search for users or applications, and it just fulfill the requirement of a distributed file system by supporting to locate the physical location of the file using the system specific metadata. +Add/Update. Writing to HDFS should be done by creating a new file and writing data to it. Hadoop addresses a single writer multiple readers’ model. Once the data is written and file is closed, one cannot remove or alter data. Data can be added to the file by reopening the file and appending new data. +Delete. When a file is deleted by a user or from an application, the particular resource is not immediately removed from HDFS. The resource will be renamed and copied in to /trash directory giving the possibility to restore as long as it remains in the trash. +Mahasen’s main differentiation from above systems comes from its scalability. It can scale significantly than Nirvana Storage that depends on relational databases to scale the system, since the Mahasen metadata layer is natively distributed using a DHT.WSO2 Registry provides the clustering as the scalability option, but it is not optimized for large file transfers and storing as it uses an ATOM based resource transfers. Furthermore, Mahasen provides users a comprehensive metadata model for managing the distributed resources they stored with user-defined metadata, unlike the HDFS, which only focuses on creating a Distributed file system. Further Mahasen's metadata layer is natively distributed and fault tolerant while HDFS has a single name node which can make fault tolerant only with an active passive failover configuration. +3 High Level Architecture +3.1 Mahasen High Level Architecture +As shown by Figure 1, Mahasen consists of several storage nodes which are connected as peers to a logical ring via FreePastry. Each node consists of a registry to store +metadata and a file system to store physical file parts. Once connected to the ring each node contributes to the metadata space as well as file storage capacity, scaling the system dynamically with new node additions. Nodes use underline DHT (FreePastry) routing protocol to communicate efficiently with each other. +Fig. 1. Mahasen High Level Architecture +Mahasen uses a WSO2 registry and the file system in each node and DHT based architecture is used to connect the nodes to a one unit. +Mahasen has a distributed metadata layer that stores data about the distributed files in Mahasen peer to peer network. The metadata catalog is used to broker the stored resources in the network and to assist the user to locate the files in Mahasen distributed environment abstracting the metadata management from the user. +Mahasen stores two main types of metadata, which are system-defined metadata and user-defined (descriptive) metadata. System defined metadata is mainly used for server side resource handling. File name, file size, stored node IPs of file are examples of the system-defined metadata. User defined metadata is used to provide users the searching capability on those metadata. User can add tags and properties (name, value pairs) to the files that are uploaded. +Fig. 2. Metadata Object Structure of Mahasen +When a file is uploaded connecting to a Mahasen node the file will be temporarily saved in that node. Then the node will act as the master node and split the file into pre-defined sized chunks and the split parts are stored in a selected set of the neighborhood nodes of master node through parallel transfer. Then the metadata object created by master node will be stored with replicas using PAST storage implementation of Free pastry. We have rewritten PAST node’s persistent storage such that the data will be stored in the WSO registry in that node. +After storing the metadata, the nodes that received file parts act as worker nodes and replicate their file parts in parallel according to the replicate request issued by the master node. Each worker node will update the metadata object with stored locations of the file parts which were replicated after replicating their file parts using the capability of concurrent access to metadata objects, and Mahasen handles them using the locking system provided by the lock manager of DHT. +User can request to download a file from any Mahasen node and the node will first generate the resource ID for the requested and retrieve the metadata object. Then it extracts the locations of Mahasen nodes that contain the file parts from the metadata object and retrieve those parts to the local machine. The parts will be merged to create the original file after retrieving all the parts and the file will be streamed to the user. +Deletion can be performed with a single command across a heterogeneous storage system. When a delete request for a file is issued, by following the same method of retrieving the file, Mahasen finds nodes that store parts of the file and deletes them. Finally the metadata object will also be deleted with replicas +When user needs to update the user-defined metadata, the node that receives the update request retrieves the metadata object for the file from the DHT, updates it, and stores it back in the DHT. +. Using this model, Mahasen has built a complete decentralized metadata system that handles metadata management in a highly scalable and efficient manner. +Mahasen keeps replicas of both actual files and metadata objects. The main purpose of keeping replicas is for fault tolerance and failover recovery. We ensure the high availability of metadata while ensuring the scalability using free pastry’s underlying DHT. +3.2 Mahasen Search +When the amount of data in the system grows, the complexity of the search increases. Mahasen builds a distributed data structure using the underlying DHT, which can improve the performance of different search options that Mahasen supports. +The resources in Mahasen are associated with metadata and for each tag or property in system, we maintain an index pointing to all resources which have that tag or property. This is implemented as a TreeMap [16] and the property trees are stored in the DHT which handles replicas of it. +Fig. 3. A Property Tree Stored in Mahasen Memory Storage +When a user sends a search request, Mahasen extracts the requested search and initiate the execution of relevant search method. Then the resource IDs of the files which match with the given input are retrieved from the relevant property tree. Extracting the relevant resource IDs are done as follow. +Users can send search requests to any Mahasen node, and when a node receives a search request, Mahasen takes the property name given by the client and generates the property tree ID for that property. If the current node has the index for the property, it receives matching resource IDs for that property and sends them to the client. If not, the node acts as a master node and gets the node handles of the nodes which are having the specific property tree and routs Mahasen search messages with the required parameters to the node handles. Then those node handles will get the relevant resource IDs from the property trees in their memory storage and send back to the master node. +The property values in the property tree are sorted, so that if the search is a range based search, we can simply take the sub map between the initial and final property values and retrieve the set of resource IDs mapped to each of the node in the sub tree. Since these resource IDs represents the files having the given property values, Mahasen can look up for the metadata objects with those resource IDs and extract the file names to present to for the user. The operation of extracting the file names for the resource IDs has a high cost than extracting the matching resource IDs for the given search query. +Complete Data Structure built for Mahasen can support property based search, range based search, tag based search and Boolean operations for the properties such as AND operation and OR operation. The advanced search provided by Mahasen is capable of providing the search based on set of different properties and tags. +Mahasen Search utilizes the continuation model support by FreePastry in results retrieving and transferring. Therefore when a search request is issued, the application sends requests to look up node handles, which contain the particular TreeMap object to request results. Then the application will collect the first result incoming and resume action from the previous execution point. +3.3 File Handling +File Transfer. Mahasen is a network of storage nodes and users will be given a client which is the Mahasen Client to access and transfer files to the network. The Mahasen Client that is built using the Apache HttpClient [17] uses HTTP methods for transferring files to the network. First the client initiates a connection with one of the node in the network. An authenticated client is capable of uploading downloading, deleting, updating or searching for the files in the network. The File content will be added as an entity to the HTTP POST method and streamed to the target address. The receiving end will read the file stream and write it to the repository. +Replica Management. To achieve fault tolerance and failover recovery, the file will be split into a set of predefined chunks and each part will be replicated and stored in different nodes according to predefined replication factor. The placement of replicas is a critical part which affects the reliability and performance of the system. The purpose of having a policy for placement of replicas is for data reliability, availability, and network bandwidth utilization. The current policy of Mahasen is to store the replicated files in leaf nodes set to the initial node. The selection of nodes in the leaf set will be calculated using cost evaluation function which focus on the distance of the node. +After successfully transferring the file to the initial node, the client will be notified about the status of the file transfer and initial node will then replicate and transfer the file to other nodes. The number of copies kept for a file is called the replication factor of that file and will be decided by the Mahasen system. +File Splitting and Parallel transfer. Mahasen storage network is designed to store large files reliably across distributed nodes. When storing the file it will be split into blocks of fixed size and these blocks will be replicated across the network for fault tolerance. The transferring of replicated file blocks will be done in parallel to other nodes in order to utilize the bandwidth and to save time. +When focusing on the retrieval of a file by using the metadata object the system will then select a node which is closest to the reader node and download the blocks to the client. Downloading of file blocks will also be done in parallel and then the blocks will be merged to create the complete file. +3.4 Mahasen API +Mahasen provides a complete API to perform CRUD operations and search. Users can develop external clients apart from the default client Mahasen provides and integrate with existing systems to perform resource management and search operations. +3 Performance Analysis +The Mahasen System Scalability was tested by running a system with M nodes and N parallel clients. Here the value for M was 1, 6, 12, 18, 24 and N was 1, 5, 10, 15, 20. Each client carried out upload, download, delete and search operations for 10 times and the average was taken. The system configuration that was used in this test are, Two machines with Intel(R) Xeon(R) CPU E5-2403 1.80GHz 4 Core machines having 24GB RAM and One machine with Intel(R) Xeon(R) CPU E5-2470 2.30GHz 8 Core machines having 63GB RAM. Following Figures (from 4 to 7) depicts the results of this test. In the upload test, 500MB size files were used by each client. . +Fig. 4. Upload test results +In the results it is observed that when the number of client increases, the upload time is also increasing. We believe that this is due to the network congestion and background processes of data replication across nodes. When the number of nodes increased to 18 or 24, a reduction in upload time were observed. This was an expected behaviour, because the node which client selects to upload, distributes replica management task for other nodes in the p2p ring. +Fig. 5. Download test results +When download files using Mahasen client, it is observed that with the increase of number of client, the single node setup has a significant growth in the download time. In the performance test, a single node was chosen to send the client request while it coordinates the file transfer from other nodes in the setup. Therefore when there are multiple nodes in the system you can download file parts from other available nodes, which reduces the download time. +Fig. 6. Delete test results +When Mahasen performs a Delete on a resource, it involves 3 operations such as deleting metadata, deleting entries from search index, and deleting the physical file. When more nodes are in the system, each node can participate in deleting its own files in parallel, making the system more scalable and efficient. +Fig 7. Search test results +Search results illustrate that Mahasen can perform well even with more nodes added to the system. Usually single node should have the lowest possible time as it does not have to search across the p2p ring. But with multiple nodes, it has to aggregate results and present it to the client. This can be observed from the figure that, when more clients are in the system, results tend to converge into a lower value due to caching as we requested search operation through the same node. +3 Discussion and future work +Mahasen provides a highly scalable metadata structure with its peer-to-peer architecture in the metadata catalog. Unlike the existing metadata catalogs that use centralized architecture, Mahasen distributes metadata across the nodes in the system with the replication making the overall system scalable and fault tolerant. +Mahasen keeps replicas of both metadata objects and property trees as well. The DHT of FreePastry is used to store these objects in the system which provides easy access of them. Keeping replicas of metadata objects and property tree objects do not cost as much as keeping replicas of actual files which are very large in size compared to metadata and property tree objects. By having these objects with replicas in the system, Mahasen has been able to ensure the correct functioning of many of the Mahasen operations even in the conditions like node failures. +An important contribution of Mahasen is developing a distributed indexing structure on top of the DHT for searching data products using different properties associated with data products. Since Mahasen needed to support range based queries, we evaluated earlier effort to build such index structures. Skip Tree Graph [18] was one of the best candidates we selected for search assisting data structure, which can efficiently support range based queries over a DHT. Since we had different properties and data structure had to grow in two dimensions, one in number of properties and the other one in number of entries for one property we were forced to create different DHTs for different properties. Therefore we needed to evaluate a much less complex +solution since maintaining different DHTs could have been very expensive in terms of resources. +When the system scales up with the large number of nodes, it will be more costly to issue a search operation on the available raw metadata stored. Therefore Mahasen developed a combined data structure with DHT and TreeMap as explained earlier. +When a Mahasen node fails, and it is detected by the existing nodes in the network, Mahasen replicates all the metadata objects and the property tree objects which were in the failed node to the existing Mahasen node reading them from other replicas. Mahasen helps in preserving the availability of metadata objects and property tree objects by maintaining the replication factor of them a constant. +Current Mahasen design has several limitations, which we plan to handle as future works. Currently Mahasen stores each property indexes in one Mahasen node and assumes that it will fit within the memory of that node. This may not be major concern for simple cases, and even NoSQL storages like Cassandra makes similar assumptions. Dividing the property tree into parts and storing them in different nodes when it is larger than a given size can solve this problem. We can predefine the maximum size of a part that will be residing in one node. +Another challenge is that search based multiple properties where at least one is a common property would force Mahasen to join large data sets, and one potential solution is to negotiate the size of data sets before start the data merging. +To summarize, Mahasen project builds a scalable storage solution by making a group of existing open source registries work as a one unit. It provides a one logical global namespace, and users may talk to any node of the group and perform any operations. +Mahasen connects nodes (registries) using PAST, a storage overlay implemented on top of Pastry DHT algorithm. Furthermore, Mahasen builds a distributed indexing structure on top of DHT to support property-based search of data items. +A user can benefit from the Web Service API provided and effectively utilize for batch processing of file uploading task through a custom client or basic client provided by Mahasen. +References +1. Alexander, S., Szalay, Peter, Z., Kunszt, Ani Thakar, Jim Gray, Don Slutz, and Robert, J., Brunner.: Designing and Mining Multi-Terabyte Astronomy Archives.: The Sloan Digital Sky Survey. In: SIGMOD ’00 Proceedings of the 2000 ACM SIGMOD international conference on Management of data (2000) +2. Chaitanya Baru, Reagan Moore, Arcot Rajasekar, Michael Wan.:The SDSC Storage Resource Broker (1998) +3. Reagan, W., Moore.: Managing Large Distributed Data Sets using the Storage Resource Broker (2010) +4. G., DeCandia, D., Hastorun, and M., Jampani.: Dynamo.: Amazon’s Highly Available Key-value Store (2010) +5. Ghemawat, S.-T., Leun, and H., Gobioff.: The Google File System. +6. B., K., Nuno Santos.: Distributed Metadata with the AMGA Metadata Catalog. +7. Nirvana Storage - Home of the Storage Resource Broker (SRB®), http://www.nirvanastorage.com/index.php?module=htmlpages&func=display&pid=1 (2011) +8. XML Metadata Concept Catalog (XMC Cat), Data to Insight Center, Indiana University Pervasive Technology Institute, http://d2i.indiana.edu/xmccat. +9. Nirvana Performance, http://www.nirvanastorage.com/index.php?module=htmlpages&func=display&pid=54. +10. ApacheTM OODT, http://oodt.apache.org/ (2011) +11. WSO2 Governance Registry - lean.enterprise.middleware - open source SOA | WSO2, http://wso2.com/products/governance-registry/ (2011) +12. Apache Lucene - Overview, http://lucene.apache.org/java/docs/index.html. +13. HDFS Architecture Guide, http://hadoop.apache.org/docs/r1.0.4/hdfs_design.html (2011) +14. Pastry - A scalable, decentralized, self-organizing and fault-tolerant substrate for peer-to-peer applications, http://www.freepastry.org/. +15. P., Druschel and A., Rowstron.: PAST: A large-scale, persistent peer-to-peer storage utility. In: HotOS VIII, Schoss Elmau, Germany (2001) +16. TreeMap (Java 2 Platform SE 5.0), http://download.oracle.com/javase/1.5.0/docs/api/java/util/TreeMap.html (2011) +17. HttpClient - HttpComponents HttpClient Overview, http://hc.apache.org/httpcomponents-client-ga/ (2011) +18. Alejandra Gonz´alez Beltr´an, Paul Sage and Peter Milligan.: Skip Tree Graph: a Distributed and Balanced Search Tree for Peer-to-Peer Networks. \ No newline at end of file diff --git a/src/main/resources/cdtocode/doc/Apache OODT File Manager/Mahasen Distributed Storage Resource Broker-simEnts.txt b/src/main/resources/cdtocode/doc/Apache OODT File Manager/Mahasen Distributed Storage Resource Broker-simEnts.txt new file mode 100644 index 0000000000000000000000000000000000000000..67416259f0fde4ffff53c54c973b52b030fce5e7 --- /dev/null +++ b/src/main/resources/cdtocode/doc/Apache OODT File Manager/Mahasen Distributed Storage Resource Broker-simEnts.txt @@ -0,0 +1,141 @@ +HAL Id:hal-01513774 +https://hal.inria.fr/hal-01513774 +Submitted on25Apr2017 +HAL is amulti-disciplinaryopenaccess +archiveforthedepositanddisseminationofsci- +entificresearchdocuments,whethertheyarepub- +lished ornot.Thedocumentsmaycomefrom +teachingandresearchinstitutionsinFranceor +abroad, orfrompublicorprivateresearchcenters. +L’archiveouvertepluridisciplinaire HAL, est +destinée audépôtetàladiffusiondedocuments +scientifiquesdeniveaurecherche,publiésounon, +émanantdesétablissementsd’enseignementetde +recherchefrançaisouétrangers,deslaboratoires +publics ouprivés. +Distributed underaCreativeCommons Attribution| 4.0InternationalLicense +Mahasen: DistributedStorageResourceBroker +K. Perera,T.Kishanthan,H.Perera,D.Madola,MalakaWalpola,Srinath +Perera +Tocitethisversion: +K. Perera,T.Kishanthan,H.Perera,D.Madola,MalakaWalpola,etal..Mahasen:Distributed +Storage ResourceBroker.10thInternationalConferenceonNetworkandParallelComputing(NPC), +Sep 2013,Guiyang,China.pp.380-392,￿10.1007/978-3-642-40820-5_32￿.￿hal-01513774￿ +Mahasen: Distributed Storage Resource Broker +K.D.A.K.S.Perera1, T Kishanthan1, H.A.S.Perera1, D.T.H.V.Madola1, Malaka Walpola1, Srinath Perera2 +1 Computer Science and Engineering Department, University Of Moratuwa, Sri Lanka. {shelanrc, kshanth2101, ashansa.perera, hirunimadola, malaka.uom}@gmail.com +2 WSO2 Lanka, No 59, Flower Road, Colombo 07, Sri Lanka +srinath@wso2.com +Abstract. Modern day systems are facing an avalanche of data, and they are being forced to handle more and more data intensive use cases. These data comes in many forms and shapes: Sensors (RFID, Near Field Communication, Weather Sensors), transaction logs, Web, social networks etc. As an example, weather sensors across the world generate a large amount of data throughout the year. Handling these and similar data require scalable, efficient, reliable and very large storages with support for efficient metadata based searching. This paper present Mahasen, a highly scalable storage for high volume data intensive applications built on top of a peer-to-peer layer. In addition to scalable storage, Mahasen also supports efficient searching, built on top of the Distributed Hash table (DHT) +1 Introduction +Currently United States collects weather data from many sources like Doppler readers deployed across the country, aircrafts, mobile towers and Balloons etc. These sensors keep generating a sizable amount of data. Processing them efficiently as needed is pushing our understanding about large-scale data processing to its limits. +Among many challenges data poses, a prominent one is storing the data and indexing them so that scientist and researchers can come and ask for specific type of data collected at a given time and in a given region. For example, a scientist may want to search for all Automated Weather data items collected in Bloomington area in June 15 between 8am-12pm. +Although we have presented meteorology as an example, there are many similar use cases. For instance, Sky server [1] is one of the best examples that illustrate the use case of large data generation. This project expects to collect 40 terabytes of data in five years. In its data collection, the photometric catalog is expected to contain about 500 distinct attributes for each of one hundred million galaxies, one hundred million stars, and one million quasars. Similarly many sciences, analytic processing organizations, data mining use cases etc., would want to store large amount of data and process them later in a selective manner. These systems often store data as files +and there have been several efforts to build large scale Metadata catalogs [2][3] and storage solutions[4][5] to support storing and searching those data items. One such example is AMGA metadata catalog [6] which was an effort to build replication and distribution mechanism for metadata catalogs. +As we discuss in the related work section, most of the metadata catalog implementations use centralized architectures and therefore have limited scalability unlike Mahasen. For example, Nirvana Storage [7] has a centralized metadata catalog which only supports scalability through vendor’s mechanism such as Oracle Real Application clusters. XML Metadata Concept catalog (XMC Cat) [8] is another centralized metadata catalog which stores hierarchical rich metadata. This paper presents Mahasen, a scalable metadata catalog and storage server built on top of a P2P technology. Further, it is built by distributing an open source centralized Data registry (WSO2 Registry). +Mahasen (Distributed Storage Resource Broker) is a Data Grid Management System (DGMS) that can manage a large volume of distributed data. It targets high volume data intensive applications. The architecture of Mahasen has been designed to present a single global logical namespace across all the stored data, and it maintains a metadata structure which can be used to search files based on its’ attributes. It is a network of storage servers that plays the dual purpose of a metadata catalog and a storage server. Mahasen will solve the huge data storage problem and fault tolerance in data intensive computing through aggregating low cost hardware while having both metadata and actual resources distributed without single point of failure. Metadata management will ensure the capability of searching files based on attributes of the stored resources. Mahasen has a metadata catalog, which is highly distributed and well scalable. The metadata layer ensures fault tolerance by keeping replicas of metadata. +The rest of the paper is organized as follows. The next section will discuss the related work in Metadata catalogs and Storage servers while comparing and contrasting them with Mahasen. The following section will discuss Mahasen architecture. The next section will present the performance evaluation of Mahasen. Finally the discussion section discusses limitations, other potential solutions and directions. +2 Related Work +2.1 Nirvana Storage +Nirvana SRB [7] is a middleware system that federates large heterogeneous data resources distributed across a network. The ability to access, manage, search and organize data across the entire SRB Federation is provided via a Global Namespace. MCAT is the centralized metadata repository which maintains two types of records – system- and user-metadata. Scalability of MCAT is achieved using database vendor’s mechanisms [9], hence limited by Relational DB scalability Limits. +Storage/Replication. The stored resources are divided as Physical resources, Logical resources and Cluster resources. Replication of resources across multiple servers ensures the availability and recoverability of resources during failovers. +Retrieve. Data stream routing is handled by SRB and TCP/IP, making the data transfer process transparent to the users.. +Search. Searching is done based on metadata attributes which are extracted and managed by the SRB. +Add/Update. Data can be added in two ways: Registration and Ingestion. Registration does not transfer any data but only creates a pointer to the data in MCAT. Ingestion is similar to registration but also transfers the data to an SRB storage resource. +Delete. If a file shadow object is used as a data object to ingest a file resource to SRB then file will be removed from MCAT but not from the physical location. +2.2 Apache OODT +OODT[10] is a middleware system for metadata that provides transparent access to the resources. It facilitates functionalities such as store, retrieve, search and analyze distributed data, objects and databases jointly. OODT provides a product service and profile service which manage data and metadata respectively. +Storage/Replication. OODT stores data product in a file-based storage in a distributed manner. They classify storage into three categories: on-line, near-line or off-line storage. +Retrieve. When OODT receives a request for retrieving a file, it issues a profile query to a product server that helps in resolving resources that could provide data. The response will include the target product server address in the form of a URI. The OODT issues a product query based on the profile query results to get the data, and it will actually retrieve data from the product server in a MIME-compliant format. +Search. OODT uses the profile server and the product server for searching the metadata and retrieve the products, and it has multiple of each type of server. OODT is based on client server architecture and it promotes REST-style architectural pattern for search and retrieve data. The profile or a subset of profile is returned for retrieval. +Add/Update. OODT provide data management including manage files and folders with the implementation of javax.sql.datasource interface. +Delete. The file management component of a Catalog and Archive Service support the delete of resource files and metadata through the implementation of javax.sql.datasource interface. +2.3 WSO2 Governance Registry +WSO2 Governance Registry [11] is a repository that allows users to store resources in a tree-structured manner, just like with a file system. However, unlike a file system, users may annotate resources using their custom properties, and also WSO2 Registry has built in metadata management features like tagging, associating resources. +However, WSO2 registry is backed by a Relational Database system, and it uses database features to store data, metadata, to manage them, and to search. Hence it has a centralized architecture. Mahasen extends that architecture to a distributed architecture. +Replication. There is no inbuilt mechanism to do the replication of resources in WSO2 registry. +Search. The WSO2 registry provides two types of searches. One is searching for a resource with their name, metadata etc., and it is implemented using underline relational database system. The second one is searching the content of resources, and implemented using Lucene [12]. The second search is only applicable to resources with textual content. +Add/Update. Adding of resources to registry can be done in two ways. First one is adding via the web interface provided by the registry. When adding a new resource, it is also possible to add additional metadata such as tags, properties of name value pairs, which later will be useful to search for that resource. The other way to add resources is by writing your own way by extending the registry API and exposing it as a web service. +The major limitation with registry, when storing resources, is the amount of memory available. Since it uses the java heap memory to buffer the resources before storing them, large files cannot be stored as the available memory is only limited to few hundred of megabytes. +2.4 Hadoop Distributed File System +Apache Hadoop Distributed File System is (HDFS)[13] is a file system designed to run on commodity hardware. HDFS has a master slave architecture that consists of a single NameNode as master and number of DataNodes. The NameNode is responsible of regulating access to files by client and managing the namespace of the file system. Generally DataNodes are deployed one per node in the cluster, and is responsible of managing storage attached to that node. +Storage / Replication. Hadoop supports hierarchical file organization where user can create directories and store files. It splits the file in to chunks with the default size of 64MB and stores them as sequence of blocks, and those blocks are stored in underlying file system of DataNodes. Those blocks are replicated for fault tolerance and the block size and the replication factor of data are configurable. +Retrieve. Applications that run on HDFS need streaming access to their data sets. Data nodes will be responsible for the read requests that issued from a user to retrieve data from the system. +Search. Hadoop Distributed File System does not provide a comprehensive search for users or applications, and it just fulfill the requirement of a distributed file system by supporting to locate the physical location of the file using the system specific metadata. +Add/Update. Writing to HDFS should be done by creating a new file and writing data to it. Hadoop addresses a single writer multiple readers’ model. Once the data is written and file is closed, one cannot remove or alter data. Data can be added to the file by reopening the file and appending new data. +Delete. When a file is deleted by a user or from an application, the particular resource is not immediately removed from HDFS. The resource will be renamed and copied in to /trash directory giving the possibility to restore as long as it remains in the trash. +Mahasen’s main differentiation from above systems comes from its scalability. It can scale significantly than Nirvana Storage that depends on relational databases to scale the system, since the Mahasen metadata layer is natively distributed using a DHT.WSO2 Registry provides the clustering as the scalability option, but it is not optimized for large file transfers and storing as it uses an ATOM based resource transfers. Furthermore, Mahasen provides users a comprehensive metadata model for managing the distributed resources they stored with user-defined metadata, unlike the HDFS, which only focuses on creating a Distributed file system. Further Mahasen's metadata layer is natively distributed and fault tolerant while HDFS has a single name node which can make fault tolerant only with an active passive failover configuration. +3 High Level Architecture +3.1 Mahasen High Level Architecture +As shown by Figure 1, Mahasen consists of several storage nodes which are connected as peers to a logical ring via FreePastry. Each node consists of a registry to store +metadata and a file system to store physical file parts. Once connected to the ring each node contributes to the metadata space as well as file storage capacity, scaling the system dynamically with new node additions. Nodes use underline DHT (FreePastry) routing protocol to communicate efficiently with each other. +Fig. 1. Mahasen High Level Architecture +Mahasen uses a WSO2 registry and the file system in each node and DHT based architecture is used to connect the nodes to a one unit. +Mahasen has a distributed metadata layer that stores data about the distributed files in Mahasen peer to peer network. The metadata catalog is used to broker the stored resources in the network and to assist the user to locate the files in Mahasen distributed environment abstracting the metadata management from the user. +Mahasen stores two main types of metadata, which are system-defined metadata and user-defined (descriptive) metadata. System defined metadata is mainly used for server side resource handling. File name, file size, stored node IPs of file are examples of the system-defined metadata. User defined metadata is used to provide users the searching capability on those metadata. User can add tags and properties (name, value pairs) to the files that are uploaded. +Fig. 2. Metadata Object Structure of Mahasen +When a file is uploaded connecting to a Mahasen node the file will be temporarily saved in that node. Then the node will act as the master node and split the file into pre-defined sized chunks and the split parts are stored in a selected set of the neighborhood nodes of master node through parallel transfer. Then the metadata object created by master node will be stored with replicas using PAST storage implementation of Free pastry. We have rewritten PAST node’s persistent storage such that the data will be stored in the WSO registry in that node. +After storing the metadata, the nodes that received file parts act as worker nodes and replicate their file parts in parallel according to the replicate request issued by the master node. Each worker node will update the metadata object with stored locations of the file parts which were replicated after replicating their file parts using the capability of concurrent access to metadata objects, and Mahasen handles them using the locking system provided by the lock manager of DHT. +User can request to download a file from any Mahasen node and the node will first generate the resource ID for the requested and retrieve the metadata object. Then it extracts the locations of Mahasen nodes that contain the file parts from the metadata object and retrieve those parts to the local machine. The parts will be merged to create the original file after retrieving all the parts and the file will be streamed to the user. +Deletion can be performed with a single command across a heterogeneous storage system. When a delete request for a file is issued, by following the same method of retrieving the file, Mahasen finds nodes that store parts of the file and deletes them. Finally the metadata object will also be deleted with replicas +When user needs to update the user-defined metadata, the node that receives the update request retrieves the metadata object for the file from the DHT, updates it, and stores it back in the DHT. +. Using this model, Mahasen has built a complete decentralized metadata system that handles metadata management in a highly scalable and efficient manner. +Mahasen keeps replicas of both actual files and metadata objects. The main purpose of keeping replicas is for fault tolerance and failover recovery. We ensure the high availability of metadata while ensuring the scalability using free pastry’s underlying DHT. +3.2 Mahasen Search +When the amount of data in the system grows, the complexity of the search increases. Mahasen builds a distributed data structure using the underlying DHT, which can improve the performance of different search options that Mahasen supports. +The resources in Mahasen are associated with metadata and for each tag or property in system, we maintain an index pointing to all resources which have that tag or property. This is implemented as a TreeMap [16] and the property trees are stored in the DHT which handles replicas of it. +Fig. 3. A Property Tree Stored in Mahasen Memory Storage +When a user sends a search request, Mahasen extracts the requested search and initiate the execution of relevant search method. Then the resource IDs of the files which match with the given input are retrieved from the relevant property tree. Extracting the relevant resource IDs are done as follow. +Users can send search requests to any Mahasen node, and when a node receives a search request, Mahasen takes the property name given by the client and generates the property tree ID for that property. If the current node has the index for the property, it receives matching resource IDs for that property and sends them to the client. If not, the node acts as a master node and gets the node handles of the nodes which are having the specific property tree and routs Mahasen search messages with the required parameters to the node handles. Then those node handles will get the relevant resource IDs from the property trees in their memory storage and send back to the master node. +The property values in the property tree are sorted, so that if the search is a range based search, we can simply take the sub map between the initial and final property values and retrieve the set of resource IDs mapped to each of the node in the sub tree. Since these resource IDs represents the files having the given property values, Mahasen can look up for the metadata objects with those resource IDs and extract the file names to present to for the user. The operation of extracting the file names for the resource IDs has a high cost than extracting the matching resource IDs for the given search query. +Complete Data Structure built for Mahasen can support property based search, range based search, tag based search and Boolean operations for the properties such as AND operation and OR operation. The advanced search provided by Mahasen is capable of providing the search based on set of different properties and tags. +Mahasen Search utilizes the continuation model support by FreePastry in results retrieving and transferring. Therefore when a search request is issued, the application sends requests to look up node handles, which contain the particular TreeMap object to request results. Then the application will collect the first result incoming and resume action from the previous execution point. +3.3 File Handling +File Transfer. Mahasen is a network of storage nodes and users will be given a client which is the Mahasen Client to access and transfer files to the network. The Mahasen Client that is built using the Apache HttpClient [17] uses HTTP methods for transferring files to the network. First the client initiates a connection with one of the node in the network. An authenticated client is capable of uploading downloading, deleting, updating or searching for the files in the network. The File content will be added as an entity to the HTTP POST method and streamed to the target address. The receiving end will read the file stream and write it to the repository. +Replica Management. To achieve fault tolerance and failover recovery, the file will be split into a set of predefined chunks and each part will be replicated and stored in different nodes according to predefined replication factor. The placement of replicas is a critical part which affects the reliability and performance of the system. The purpose of having a policy for placement of replicas is for data reliability, availability, and network bandwidth utilization. The current policy of Mahasen is to store the replicated files in leaf nodes set to the initial node. The selection of nodes in the leaf set will be calculated using cost evaluation function which focus on the distance of the node. +After successfully transferring the file to the initial node, the client will be notified about the status of the file transfer and initial node will then replicate and transfer the file to other nodes. The number of copies kept for a file is called the replication factor of that file and will be decided by the Mahasen system. +File Splitting and Parallel transfer. Mahasen storage network is designed to store large files reliably across distributed nodes. When storing the file it will be split into blocks of fixed size and these blocks will be replicated across the network for fault tolerance. The transferring of replicated file blocks will be done in parallel to other nodes in order to utilize the bandwidth and to save time. +When focusing on the retrieval of a file by using the metadata object the system will then select a node which is closest to the reader node and download the blocks to the client. Downloading of file blocks will also be done in parallel and then the blocks will be merged to create the complete file. +3.4 Mahasen API +Mahasen provides a complete API to perform CRUD operations and search. Users can develop external clients apart from the default client Mahasen provides and integrate with existing systems to perform resource management and search operations. +3 Performance Analysis +The Mahasen System Scalability was tested by running a system with M nodes and N parallel clients. Here the value for M was 1, 6, 12, 18, 24 and N was 1, 5, 10, 15, 20. Each client carried out upload, download, delete and search operations for 10 times and the average was taken. The system configuration that was used in this test are, Two machines with Intel(R) Xeon(R) CPU E5-2403 1.80GHz 4 Core machines having 24GB RAM and One machine with Intel(R) Xeon(R) CPU E5-2470 2.30GHz 8 Core machines having 63GB RAM. Following Figures (from 4 to 7) depicts the results of this test. In the upload test, 500MB size files were used by each client. . +Fig. 4. Upload test results +In the results it is observed that when the number of client increases, the upload time is also increasing. We believe that this is due to the network congestion and background processes of data replication across nodes. When the number of nodes increased to 18 or 24, a reduction in upload time were observed. This was an expected behaviour, because the node which client selects to upload, distributes replica management task for other nodes in the p2p ring. +Fig. 5. Download test results +When download files using Mahasen client, it is observed that with the increase of number of client, the single node setup has a significant growth in the download time. In the performance test, a single node was chosen to send the client request while it coordinates the file transfer from other nodes in the setup. Therefore when there are multiple nodes in the system you can download file parts from other available nodes, which reduces the download time. +Fig. 6. Delete test results +When Mahasen performs a Delete on a resource, it involves 3 operations such as deleting metadata, deleting entries from search index, and deleting the physical file. When more nodes are in the system, each node can participate in deleting its own files in parallel, making the system more scalable and efficient. +Fig 7. Search test results +Search results illustrate that Mahasen can perform well even with more nodes added to the system. Usually single node should have the lowest possible time as it does not have to search across the p2p ring. But with multiple nodes, it has to aggregate results and present it to the client. This can be observed from the figure that, when more clients are in the system, results tend to converge into a lower value due to caching as we requested search operation through the same node. +3 Discussion and future work +Mahasen provides a highly scalable metadata structure with its peer-to-peer architecture in the metadata catalog. Unlike the existing metadata catalogs that use centralized architecture, Mahasen distributes metadata across the nodes in the system with the replication making the overall system scalable and fault tolerant. +Mahasen keeps replicas of both metadata objects and property trees as well. The DHT of FreePastry is used to store these objects in the system which provides easy access of them. Keeping replicas of metadata objects and property tree objects do not cost as much as keeping replicas of actual files which are very large in size compared to metadata and property tree objects. By having these objects with replicas in the system, Mahasen has been able to ensure the correct functioning of many of the Mahasen operations even in the conditions like node failures. +An important contribution of Mahasen is developing a distributed indexing structure on top of the DHT for searching data products using different properties associated with data products. Since Mahasen needed to support range based queries, we evaluated earlier effort to build such index structures. Skip Tree Graph [18] was one of the best candidates we selected for search assisting data structure, which can efficiently support range based queries over a DHT. Since we had different properties and data structure had to grow in two dimensions, one in number of properties and the other one in number of entries for one property we were forced to create different DHTs for different properties. Therefore we needed to evaluate a much less complex +solution since maintaining different DHTs could have been very expensive in terms of resources. +When the system scales up with the large number of nodes, it will be more costly to issue a search operation on the available raw metadata stored. Therefore Mahasen developed a combined data structure with DHT and TreeMap as explained earlier. +When a Mahasen node fails, and it is detected by the existing nodes in the network, Mahasen replicates all the metadata objects and the property tree objects which were in the failed node to the existing Mahasen node reading them from other replicas. Mahasen helps in preserving the availability of metadata objects and property tree objects by maintaining the replication factor of them a constant. +Current Mahasen design has several limitations, which we plan to handle as future works. Currently Mahasen stores each property indexes in one Mahasen node and assumes that it will fit within the memory of that node. This may not be major concern for simple cases, and even NoSQL storages like Cassandra makes similar assumptions. Dividing the property tree into parts and storing them in different nodes when it is larger than a given size can solve this problem. We can predefine the maximum size of a part that will be residing in one node. +Another challenge is that search based multiple properties where at least one is a common property would force Mahasen to join large data sets, and one potential solution is to negotiate the size of data sets before start the data merging. +To summarize, Mahasen project builds a scalable storage solution by making a group of existing open source registries work as a one unit. It provides a one logical global namespace, and users may talk to any node of the group and perform any operations. +Mahasen connects nodes (registries) using PAST, a storage overlay implemented on top of Pastry DHT algorithm. Furthermore, Mahasen builds a distributed indexing structure on top of DHT to support property-based search of data items. +A user can benefit from the Web Service API provided and effectively utilize for batch processing of file uploading task through a custom client or basic client provided by Mahasen. +References +1. Alexander, S., Szalay, Peter, Z., Kunszt, Ani Thakar, Jim Gray, Don Slutz, and Robert, J., Brunner.: Designing and Mining Multi-Terabyte Astronomy Archives.: The Sloan Digital Sky Survey. In: SIGMOD ’00 Proceedings of the 2000 ACM SIGMOD international conference on Management of data (2000) +2. Chaitanya Baru, Reagan Moore, Arcot Rajasekar, Michael Wan.:The SDSC Storage Resource Broker (1998) +3. Reagan, W., Moore.: Managing Large Distributed Data Sets using the Storage Resource Broker (2010) +4. G., DeCandia, D., Hastorun, and M., Jampani.: Dynamo.: Amazon’s Highly Available Key-value Store (2010) +5. Ghemawat, S.-T., Leun, and H., Gobioff.: The Google File System. +6. B., K., Nuno Santos.: Distributed Metadata with the AMGA Metadata Catalog. +7. Nirvana Storage - Home of the Storage Resource Broker (SRB®), http://www.nirvanastorage.com/index.php?module=htmlpages&func=display&pid=1 (2011) +8. XML Metadata Concept Catalog (XMC Cat), Data to Insight Center, Indiana University Pervasive Technology Institute, http://d2i.indiana.edu/xmccat. +9. Nirvana Performance, http://www.nirvanastorage.com/index.php?module=htmlpages&func=display&pid=54. +10. ApacheTM OODT, http://oodt.apache.org/ (2011) +11. WSO2 Governance Registry - lean.enterprise.middleware - open source SOA | WSO2, http://wso2.com/products/governance-registry/ (2011) +12. Apache Lucene - Overview, http://lucene.apache.org/java/docs/index.html. +13. HDFS Architecture Guide, http://hadoop.apache.org/docs/r1.0.4/hdfs_design.html (2011) +14. Pastry - A scalable, decentralized, self-organizing and fault-tolerant substrate for peer-to-peer applications, http://www.freepastry.org/. +15. P., Druschel and A., Rowstron.: PAST: A large-scale, persistent peer-to-peer storage utility. In: HotOS VIII, Schoss Elmau, Germany (2001) +16. TreeMap (Java 2 Platform SE 5.0), http://download.oracle.com/javase/1.5.0/docs/api/java/util/TreeMap.html (2011) +17. HttpClient - HttpComponents HttpClient Overview, http://hc.apache.org/httpcomponents-client-ga/ (2011) +18. Alejandra Gonz´alez Beltr´an, Paul Sage and Peter Milligan.: Skip Tree Graph: a Distributed and Balanced Search Tree for Peer-to-Peer Networks. \ No newline at end of file diff --git a/src/main/resources/cdtocode/doc/Apache OODT File Manager/Mahasen Distributed Storage Resource Broker.txt b/src/main/resources/cdtocode/doc/Apache OODT File Manager/Mahasen Distributed Storage Resource Broker.txt new file mode 100644 index 0000000000000000000000000000000000000000..67416259f0fde4ffff53c54c973b52b030fce5e7 --- /dev/null +++ b/src/main/resources/cdtocode/doc/Apache OODT File Manager/Mahasen Distributed Storage Resource Broker.txt @@ -0,0 +1,141 @@ +HAL Id:hal-01513774 +https://hal.inria.fr/hal-01513774 +Submitted on25Apr2017 +HAL is amulti-disciplinaryopenaccess +archiveforthedepositanddisseminationofsci- +entificresearchdocuments,whethertheyarepub- +lished ornot.Thedocumentsmaycomefrom +teachingandresearchinstitutionsinFranceor +abroad, orfrompublicorprivateresearchcenters. +L’archiveouvertepluridisciplinaire HAL, est +destinée audépôtetàladiffusiondedocuments +scientifiquesdeniveaurecherche,publiésounon, +émanantdesétablissementsd’enseignementetde +recherchefrançaisouétrangers,deslaboratoires +publics ouprivés. +Distributed underaCreativeCommons Attribution| 4.0InternationalLicense +Mahasen: DistributedStorageResourceBroker +K. Perera,T.Kishanthan,H.Perera,D.Madola,MalakaWalpola,Srinath +Perera +Tocitethisversion: +K. Perera,T.Kishanthan,H.Perera,D.Madola,MalakaWalpola,etal..Mahasen:Distributed +Storage ResourceBroker.10thInternationalConferenceonNetworkandParallelComputing(NPC), +Sep 2013,Guiyang,China.pp.380-392,￿10.1007/978-3-642-40820-5_32￿.￿hal-01513774￿ +Mahasen: Distributed Storage Resource Broker +K.D.A.K.S.Perera1, T Kishanthan1, H.A.S.Perera1, D.T.H.V.Madola1, Malaka Walpola1, Srinath Perera2 +1 Computer Science and Engineering Department, University Of Moratuwa, Sri Lanka. {shelanrc, kshanth2101, ashansa.perera, hirunimadola, malaka.uom}@gmail.com +2 WSO2 Lanka, No 59, Flower Road, Colombo 07, Sri Lanka +srinath@wso2.com +Abstract. Modern day systems are facing an avalanche of data, and they are being forced to handle more and more data intensive use cases. These data comes in many forms and shapes: Sensors (RFID, Near Field Communication, Weather Sensors), transaction logs, Web, social networks etc. As an example, weather sensors across the world generate a large amount of data throughout the year. Handling these and similar data require scalable, efficient, reliable and very large storages with support for efficient metadata based searching. This paper present Mahasen, a highly scalable storage for high volume data intensive applications built on top of a peer-to-peer layer. In addition to scalable storage, Mahasen also supports efficient searching, built on top of the Distributed Hash table (DHT) +1 Introduction +Currently United States collects weather data from many sources like Doppler readers deployed across the country, aircrafts, mobile towers and Balloons etc. These sensors keep generating a sizable amount of data. Processing them efficiently as needed is pushing our understanding about large-scale data processing to its limits. +Among many challenges data poses, a prominent one is storing the data and indexing them so that scientist and researchers can come and ask for specific type of data collected at a given time and in a given region. For example, a scientist may want to search for all Automated Weather data items collected in Bloomington area in June 15 between 8am-12pm. +Although we have presented meteorology as an example, there are many similar use cases. For instance, Sky server [1] is one of the best examples that illustrate the use case of large data generation. This project expects to collect 40 terabytes of data in five years. In its data collection, the photometric catalog is expected to contain about 500 distinct attributes for each of one hundred million galaxies, one hundred million stars, and one million quasars. Similarly many sciences, analytic processing organizations, data mining use cases etc., would want to store large amount of data and process them later in a selective manner. These systems often store data as files +and there have been several efforts to build large scale Metadata catalogs [2][3] and storage solutions[4][5] to support storing and searching those data items. One such example is AMGA metadata catalog [6] which was an effort to build replication and distribution mechanism for metadata catalogs. +As we discuss in the related work section, most of the metadata catalog implementations use centralized architectures and therefore have limited scalability unlike Mahasen. For example, Nirvana Storage [7] has a centralized metadata catalog which only supports scalability through vendor’s mechanism such as Oracle Real Application clusters. XML Metadata Concept catalog (XMC Cat) [8] is another centralized metadata catalog which stores hierarchical rich metadata. This paper presents Mahasen, a scalable metadata catalog and storage server built on top of a P2P technology. Further, it is built by distributing an open source centralized Data registry (WSO2 Registry). +Mahasen (Distributed Storage Resource Broker) is a Data Grid Management System (DGMS) that can manage a large volume of distributed data. It targets high volume data intensive applications. The architecture of Mahasen has been designed to present a single global logical namespace across all the stored data, and it maintains a metadata structure which can be used to search files based on its’ attributes. It is a network of storage servers that plays the dual purpose of a metadata catalog and a storage server. Mahasen will solve the huge data storage problem and fault tolerance in data intensive computing through aggregating low cost hardware while having both metadata and actual resources distributed without single point of failure. Metadata management will ensure the capability of searching files based on attributes of the stored resources. Mahasen has a metadata catalog, which is highly distributed and well scalable. The metadata layer ensures fault tolerance by keeping replicas of metadata. +The rest of the paper is organized as follows. The next section will discuss the related work in Metadata catalogs and Storage servers while comparing and contrasting them with Mahasen. The following section will discuss Mahasen architecture. The next section will present the performance evaluation of Mahasen. Finally the discussion section discusses limitations, other potential solutions and directions. +2 Related Work +2.1 Nirvana Storage +Nirvana SRB [7] is a middleware system that federates large heterogeneous data resources distributed across a network. The ability to access, manage, search and organize data across the entire SRB Federation is provided via a Global Namespace. MCAT is the centralized metadata repository which maintains two types of records – system- and user-metadata. Scalability of MCAT is achieved using database vendor’s mechanisms [9], hence limited by Relational DB scalability Limits. +Storage/Replication. The stored resources are divided as Physical resources, Logical resources and Cluster resources. Replication of resources across multiple servers ensures the availability and recoverability of resources during failovers. +Retrieve. Data stream routing is handled by SRB and TCP/IP, making the data transfer process transparent to the users.. +Search. Searching is done based on metadata attributes which are extracted and managed by the SRB. +Add/Update. Data can be added in two ways: Registration and Ingestion. Registration does not transfer any data but only creates a pointer to the data in MCAT. Ingestion is similar to registration but also transfers the data to an SRB storage resource. +Delete. If a file shadow object is used as a data object to ingest a file resource to SRB then file will be removed from MCAT but not from the physical location. +2.2 Apache OODT +OODT[10] is a middleware system for metadata that provides transparent access to the resources. It facilitates functionalities such as store, retrieve, search and analyze distributed data, objects and databases jointly. OODT provides a product service and profile service which manage data and metadata respectively. +Storage/Replication. OODT stores data product in a file-based storage in a distributed manner. They classify storage into three categories: on-line, near-line or off-line storage. +Retrieve. When OODT receives a request for retrieving a file, it issues a profile query to a product server that helps in resolving resources that could provide data. The response will include the target product server address in the form of a URI. The OODT issues a product query based on the profile query results to get the data, and it will actually retrieve data from the product server in a MIME-compliant format. +Search. OODT uses the profile server and the product server for searching the metadata and retrieve the products, and it has multiple of each type of server. OODT is based on client server architecture and it promotes REST-style architectural pattern for search and retrieve data. The profile or a subset of profile is returned for retrieval. +Add/Update. OODT provide data management including manage files and folders with the implementation of javax.sql.datasource interface. +Delete. The file management component of a Catalog and Archive Service support the delete of resource files and metadata through the implementation of javax.sql.datasource interface. +2.3 WSO2 Governance Registry +WSO2 Governance Registry [11] is a repository that allows users to store resources in a tree-structured manner, just like with a file system. However, unlike a file system, users may annotate resources using their custom properties, and also WSO2 Registry has built in metadata management features like tagging, associating resources. +However, WSO2 registry is backed by a Relational Database system, and it uses database features to store data, metadata, to manage them, and to search. Hence it has a centralized architecture. Mahasen extends that architecture to a distributed architecture. +Replication. There is no inbuilt mechanism to do the replication of resources in WSO2 registry. +Search. The WSO2 registry provides two types of searches. One is searching for a resource with their name, metadata etc., and it is implemented using underline relational database system. The second one is searching the content of resources, and implemented using Lucene [12]. The second search is only applicable to resources with textual content. +Add/Update. Adding of resources to registry can be done in two ways. First one is adding via the web interface provided by the registry. When adding a new resource, it is also possible to add additional metadata such as tags, properties of name value pairs, which later will be useful to search for that resource. The other way to add resources is by writing your own way by extending the registry API and exposing it as a web service. +The major limitation with registry, when storing resources, is the amount of memory available. Since it uses the java heap memory to buffer the resources before storing them, large files cannot be stored as the available memory is only limited to few hundred of megabytes. +2.4 Hadoop Distributed File System +Apache Hadoop Distributed File System is (HDFS)[13] is a file system designed to run on commodity hardware. HDFS has a master slave architecture that consists of a single NameNode as master and number of DataNodes. The NameNode is responsible of regulating access to files by client and managing the namespace of the file system. Generally DataNodes are deployed one per node in the cluster, and is responsible of managing storage attached to that node. +Storage / Replication. Hadoop supports hierarchical file organization where user can create directories and store files. It splits the file in to chunks with the default size of 64MB and stores them as sequence of blocks, and those blocks are stored in underlying file system of DataNodes. Those blocks are replicated for fault tolerance and the block size and the replication factor of data are configurable. +Retrieve. Applications that run on HDFS need streaming access to their data sets. Data nodes will be responsible for the read requests that issued from a user to retrieve data from the system. +Search. Hadoop Distributed File System does not provide a comprehensive search for users or applications, and it just fulfill the requirement of a distributed file system by supporting to locate the physical location of the file using the system specific metadata. +Add/Update. Writing to HDFS should be done by creating a new file and writing data to it. Hadoop addresses a single writer multiple readers’ model. Once the data is written and file is closed, one cannot remove or alter data. Data can be added to the file by reopening the file and appending new data. +Delete. When a file is deleted by a user or from an application, the particular resource is not immediately removed from HDFS. The resource will be renamed and copied in to /trash directory giving the possibility to restore as long as it remains in the trash. +Mahasen’s main differentiation from above systems comes from its scalability. It can scale significantly than Nirvana Storage that depends on relational databases to scale the system, since the Mahasen metadata layer is natively distributed using a DHT.WSO2 Registry provides the clustering as the scalability option, but it is not optimized for large file transfers and storing as it uses an ATOM based resource transfers. Furthermore, Mahasen provides users a comprehensive metadata model for managing the distributed resources they stored with user-defined metadata, unlike the HDFS, which only focuses on creating a Distributed file system. Further Mahasen's metadata layer is natively distributed and fault tolerant while HDFS has a single name node which can make fault tolerant only with an active passive failover configuration. +3 High Level Architecture +3.1 Mahasen High Level Architecture +As shown by Figure 1, Mahasen consists of several storage nodes which are connected as peers to a logical ring via FreePastry. Each node consists of a registry to store +metadata and a file system to store physical file parts. Once connected to the ring each node contributes to the metadata space as well as file storage capacity, scaling the system dynamically with new node additions. Nodes use underline DHT (FreePastry) routing protocol to communicate efficiently with each other. +Fig. 1. Mahasen High Level Architecture +Mahasen uses a WSO2 registry and the file system in each node and DHT based architecture is used to connect the nodes to a one unit. +Mahasen has a distributed metadata layer that stores data about the distributed files in Mahasen peer to peer network. The metadata catalog is used to broker the stored resources in the network and to assist the user to locate the files in Mahasen distributed environment abstracting the metadata management from the user. +Mahasen stores two main types of metadata, which are system-defined metadata and user-defined (descriptive) metadata. System defined metadata is mainly used for server side resource handling. File name, file size, stored node IPs of file are examples of the system-defined metadata. User defined metadata is used to provide users the searching capability on those metadata. User can add tags and properties (name, value pairs) to the files that are uploaded. +Fig. 2. Metadata Object Structure of Mahasen +When a file is uploaded connecting to a Mahasen node the file will be temporarily saved in that node. Then the node will act as the master node and split the file into pre-defined sized chunks and the split parts are stored in a selected set of the neighborhood nodes of master node through parallel transfer. Then the metadata object created by master node will be stored with replicas using PAST storage implementation of Free pastry. We have rewritten PAST node’s persistent storage such that the data will be stored in the WSO registry in that node. +After storing the metadata, the nodes that received file parts act as worker nodes and replicate their file parts in parallel according to the replicate request issued by the master node. Each worker node will update the metadata object with stored locations of the file parts which were replicated after replicating their file parts using the capability of concurrent access to metadata objects, and Mahasen handles them using the locking system provided by the lock manager of DHT. +User can request to download a file from any Mahasen node and the node will first generate the resource ID for the requested and retrieve the metadata object. Then it extracts the locations of Mahasen nodes that contain the file parts from the metadata object and retrieve those parts to the local machine. The parts will be merged to create the original file after retrieving all the parts and the file will be streamed to the user. +Deletion can be performed with a single command across a heterogeneous storage system. When a delete request for a file is issued, by following the same method of retrieving the file, Mahasen finds nodes that store parts of the file and deletes them. Finally the metadata object will also be deleted with replicas +When user needs to update the user-defined metadata, the node that receives the update request retrieves the metadata object for the file from the DHT, updates it, and stores it back in the DHT. +. Using this model, Mahasen has built a complete decentralized metadata system that handles metadata management in a highly scalable and efficient manner. +Mahasen keeps replicas of both actual files and metadata objects. The main purpose of keeping replicas is for fault tolerance and failover recovery. We ensure the high availability of metadata while ensuring the scalability using free pastry’s underlying DHT. +3.2 Mahasen Search +When the amount of data in the system grows, the complexity of the search increases. Mahasen builds a distributed data structure using the underlying DHT, which can improve the performance of different search options that Mahasen supports. +The resources in Mahasen are associated with metadata and for each tag or property in system, we maintain an index pointing to all resources which have that tag or property. This is implemented as a TreeMap [16] and the property trees are stored in the DHT which handles replicas of it. +Fig. 3. A Property Tree Stored in Mahasen Memory Storage +When a user sends a search request, Mahasen extracts the requested search and initiate the execution of relevant search method. Then the resource IDs of the files which match with the given input are retrieved from the relevant property tree. Extracting the relevant resource IDs are done as follow. +Users can send search requests to any Mahasen node, and when a node receives a search request, Mahasen takes the property name given by the client and generates the property tree ID for that property. If the current node has the index for the property, it receives matching resource IDs for that property and sends them to the client. If not, the node acts as a master node and gets the node handles of the nodes which are having the specific property tree and routs Mahasen search messages with the required parameters to the node handles. Then those node handles will get the relevant resource IDs from the property trees in their memory storage and send back to the master node. +The property values in the property tree are sorted, so that if the search is a range based search, we can simply take the sub map between the initial and final property values and retrieve the set of resource IDs mapped to each of the node in the sub tree. Since these resource IDs represents the files having the given property values, Mahasen can look up for the metadata objects with those resource IDs and extract the file names to present to for the user. The operation of extracting the file names for the resource IDs has a high cost than extracting the matching resource IDs for the given search query. +Complete Data Structure built for Mahasen can support property based search, range based search, tag based search and Boolean operations for the properties such as AND operation and OR operation. The advanced search provided by Mahasen is capable of providing the search based on set of different properties and tags. +Mahasen Search utilizes the continuation model support by FreePastry in results retrieving and transferring. Therefore when a search request is issued, the application sends requests to look up node handles, which contain the particular TreeMap object to request results. Then the application will collect the first result incoming and resume action from the previous execution point. +3.3 File Handling +File Transfer. Mahasen is a network of storage nodes and users will be given a client which is the Mahasen Client to access and transfer files to the network. The Mahasen Client that is built using the Apache HttpClient [17] uses HTTP methods for transferring files to the network. First the client initiates a connection with one of the node in the network. An authenticated client is capable of uploading downloading, deleting, updating or searching for the files in the network. The File content will be added as an entity to the HTTP POST method and streamed to the target address. The receiving end will read the file stream and write it to the repository. +Replica Management. To achieve fault tolerance and failover recovery, the file will be split into a set of predefined chunks and each part will be replicated and stored in different nodes according to predefined replication factor. The placement of replicas is a critical part which affects the reliability and performance of the system. The purpose of having a policy for placement of replicas is for data reliability, availability, and network bandwidth utilization. The current policy of Mahasen is to store the replicated files in leaf nodes set to the initial node. The selection of nodes in the leaf set will be calculated using cost evaluation function which focus on the distance of the node. +After successfully transferring the file to the initial node, the client will be notified about the status of the file transfer and initial node will then replicate and transfer the file to other nodes. The number of copies kept for a file is called the replication factor of that file and will be decided by the Mahasen system. +File Splitting and Parallel transfer. Mahasen storage network is designed to store large files reliably across distributed nodes. When storing the file it will be split into blocks of fixed size and these blocks will be replicated across the network for fault tolerance. The transferring of replicated file blocks will be done in parallel to other nodes in order to utilize the bandwidth and to save time. +When focusing on the retrieval of a file by using the metadata object the system will then select a node which is closest to the reader node and download the blocks to the client. Downloading of file blocks will also be done in parallel and then the blocks will be merged to create the complete file. +3.4 Mahasen API +Mahasen provides a complete API to perform CRUD operations and search. Users can develop external clients apart from the default client Mahasen provides and integrate with existing systems to perform resource management and search operations. +3 Performance Analysis +The Mahasen System Scalability was tested by running a system with M nodes and N parallel clients. Here the value for M was 1, 6, 12, 18, 24 and N was 1, 5, 10, 15, 20. Each client carried out upload, download, delete and search operations for 10 times and the average was taken. The system configuration that was used in this test are, Two machines with Intel(R) Xeon(R) CPU E5-2403 1.80GHz 4 Core machines having 24GB RAM and One machine with Intel(R) Xeon(R) CPU E5-2470 2.30GHz 8 Core machines having 63GB RAM. Following Figures (from 4 to 7) depicts the results of this test. In the upload test, 500MB size files were used by each client. . +Fig. 4. Upload test results +In the results it is observed that when the number of client increases, the upload time is also increasing. We believe that this is due to the network congestion and background processes of data replication across nodes. When the number of nodes increased to 18 or 24, a reduction in upload time were observed. This was an expected behaviour, because the node which client selects to upload, distributes replica management task for other nodes in the p2p ring. +Fig. 5. Download test results +When download files using Mahasen client, it is observed that with the increase of number of client, the single node setup has a significant growth in the download time. In the performance test, a single node was chosen to send the client request while it coordinates the file transfer from other nodes in the setup. Therefore when there are multiple nodes in the system you can download file parts from other available nodes, which reduces the download time. +Fig. 6. Delete test results +When Mahasen performs a Delete on a resource, it involves 3 operations such as deleting metadata, deleting entries from search index, and deleting the physical file. When more nodes are in the system, each node can participate in deleting its own files in parallel, making the system more scalable and efficient. +Fig 7. Search test results +Search results illustrate that Mahasen can perform well even with more nodes added to the system. Usually single node should have the lowest possible time as it does not have to search across the p2p ring. But with multiple nodes, it has to aggregate results and present it to the client. This can be observed from the figure that, when more clients are in the system, results tend to converge into a lower value due to caching as we requested search operation through the same node. +3 Discussion and future work +Mahasen provides a highly scalable metadata structure with its peer-to-peer architecture in the metadata catalog. Unlike the existing metadata catalogs that use centralized architecture, Mahasen distributes metadata across the nodes in the system with the replication making the overall system scalable and fault tolerant. +Mahasen keeps replicas of both metadata objects and property trees as well. The DHT of FreePastry is used to store these objects in the system which provides easy access of them. Keeping replicas of metadata objects and property tree objects do not cost as much as keeping replicas of actual files which are very large in size compared to metadata and property tree objects. By having these objects with replicas in the system, Mahasen has been able to ensure the correct functioning of many of the Mahasen operations even in the conditions like node failures. +An important contribution of Mahasen is developing a distributed indexing structure on top of the DHT for searching data products using different properties associated with data products. Since Mahasen needed to support range based queries, we evaluated earlier effort to build such index structures. Skip Tree Graph [18] was one of the best candidates we selected for search assisting data structure, which can efficiently support range based queries over a DHT. Since we had different properties and data structure had to grow in two dimensions, one in number of properties and the other one in number of entries for one property we were forced to create different DHTs for different properties. Therefore we needed to evaluate a much less complex +solution since maintaining different DHTs could have been very expensive in terms of resources. +When the system scales up with the large number of nodes, it will be more costly to issue a search operation on the available raw metadata stored. Therefore Mahasen developed a combined data structure with DHT and TreeMap as explained earlier. +When a Mahasen node fails, and it is detected by the existing nodes in the network, Mahasen replicates all the metadata objects and the property tree objects which were in the failed node to the existing Mahasen node reading them from other replicas. Mahasen helps in preserving the availability of metadata objects and property tree objects by maintaining the replication factor of them a constant. +Current Mahasen design has several limitations, which we plan to handle as future works. Currently Mahasen stores each property indexes in one Mahasen node and assumes that it will fit within the memory of that node. This may not be major concern for simple cases, and even NoSQL storages like Cassandra makes similar assumptions. Dividing the property tree into parts and storing them in different nodes when it is larger than a given size can solve this problem. We can predefine the maximum size of a part that will be residing in one node. +Another challenge is that search based multiple properties where at least one is a common property would force Mahasen to join large data sets, and one potential solution is to negotiate the size of data sets before start the data merging. +To summarize, Mahasen project builds a scalable storage solution by making a group of existing open source registries work as a one unit. It provides a one logical global namespace, and users may talk to any node of the group and perform any operations. +Mahasen connects nodes (registries) using PAST, a storage overlay implemented on top of Pastry DHT algorithm. Furthermore, Mahasen builds a distributed indexing structure on top of DHT to support property-based search of data items. +A user can benefit from the Web Service API provided and effectively utilize for batch processing of file uploading task through a custom client or basic client provided by Mahasen. +References +1. Alexander, S., Szalay, Peter, Z., Kunszt, Ani Thakar, Jim Gray, Don Slutz, and Robert, J., Brunner.: Designing and Mining Multi-Terabyte Astronomy Archives.: The Sloan Digital Sky Survey. In: SIGMOD ’00 Proceedings of the 2000 ACM SIGMOD international conference on Management of data (2000) +2. Chaitanya Baru, Reagan Moore, Arcot Rajasekar, Michael Wan.:The SDSC Storage Resource Broker (1998) +3. Reagan, W., Moore.: Managing Large Distributed Data Sets using the Storage Resource Broker (2010) +4. G., DeCandia, D., Hastorun, and M., Jampani.: Dynamo.: Amazon’s Highly Available Key-value Store (2010) +5. Ghemawat, S.-T., Leun, and H., Gobioff.: The Google File System. +6. B., K., Nuno Santos.: Distributed Metadata with the AMGA Metadata Catalog. +7. Nirvana Storage - Home of the Storage Resource Broker (SRB®), http://www.nirvanastorage.com/index.php?module=htmlpages&func=display&pid=1 (2011) +8. XML Metadata Concept Catalog (XMC Cat), Data to Insight Center, Indiana University Pervasive Technology Institute, http://d2i.indiana.edu/xmccat. +9. Nirvana Performance, http://www.nirvanastorage.com/index.php?module=htmlpages&func=display&pid=54. +10. ApacheTM OODT, http://oodt.apache.org/ (2011) +11. WSO2 Governance Registry - lean.enterprise.middleware - open source SOA | WSO2, http://wso2.com/products/governance-registry/ (2011) +12. Apache Lucene - Overview, http://lucene.apache.org/java/docs/index.html. +13. HDFS Architecture Guide, http://hadoop.apache.org/docs/r1.0.4/hdfs_design.html (2011) +14. Pastry - A scalable, decentralized, self-organizing and fault-tolerant substrate for peer-to-peer applications, http://www.freepastry.org/. +15. P., Druschel and A., Rowstron.: PAST: A large-scale, persistent peer-to-peer storage utility. In: HotOS VIII, Schoss Elmau, Germany (2001) +16. TreeMap (Java 2 Platform SE 5.0), http://download.oracle.com/javase/1.5.0/docs/api/java/util/TreeMap.html (2011) +17. HttpClient - HttpComponents HttpClient Overview, http://hc.apache.org/httpcomponents-client-ga/ (2011) +18. Alejandra Gonz´alez Beltr´an, Paul Sage and Peter Milligan.: Skip Tree Graph: a Distributed and Balanced Search Tree for Peer-to-Peer Networks. \ No newline at end of file diff --git a/src/main/resources/cdtocode/doc/Apache OODT File Manager/OODT Filemgr User Guide-relation.txt b/src/main/resources/cdtocode/doc/Apache OODT File Manager/OODT Filemgr User Guide-relation.txt new file mode 100644 index 0000000000000000000000000000000000000000..d54f7ddc463251b78178bfc3b81e55ec2ccf5216 --- /dev/null +++ b/src/main/resources/cdtocode/doc/Apache OODT File Manager/OODT Filemgr User Guide-relation.txt @@ -0,0 +1,299 @@ +OODT Filemgr User Guide +页面… CAS User Guides +跳到banner的尾部 +回到标题开始 +转至元数据结尾 +由 Thomas Bennett创建, 最终由 Nadeeshan Gimhana修改于 五月 17, 2019转至元数据起始 +The File Manager +An Overview of What is Installed +Configuring and Running the File Manager +Whats going to happen? +Now for some configuration +What have we configured? +How metadata is collected? +A brief overview of filemgr-client and query-tool +Command: filemgr-client +Command: query-tool +A Typical User Scenario +A few more tools +Tips and Tricks for FileManager +The File Manager +This self guided tutorial is intended for first time users. + +The fact that you've found this page, I assume that you are seriously thinking of using the OODT File Manager but are eager to get something up and running. It hopefully also means that you've checked out the code and built a cas-filemgr install target (e.g. a cas-filemgr-${version}-dist.tar.gz file). + +This tutorial is by no means a complete overview of all the File Managers functionality. However, it's an attempt to get you started using the basic tools. Like learning to drive a car, the most difficult part is getting it started and on the road! + +The following topics are covered on this page: + +An Overview of What is Installed +Configuring and Running the File Manager +A Typical User Scenario - ingesting and querying +An Overview of What is Installed +Assumption - you have built or have access to a cas-filemgr install target. This also means that you've correctly configured maven and java for your system. + +Here are the commands to install the cas-filemgr target from a tarfile. You will need to fit in the "..." with the appropriate content. + +$ mkdir -p /usr/local/oodt/ +$ tar xzvf .../filemgr/target/cas-filemgr-${version}-dist.tar.gz -C /usr/local/oodt/ +$ cd /usr/local/oodt/ +$ ln -s cas-filemgr-${version}/ cas-filemgr +The decompressed tar file creates a directory structure that looks as follows: + +. +├── bin +│ ├── filemgr +│ ├── filemgr-client +│ └── query-tool +├── etc +│ ├── filemgr.properties +│ └── mime-types.xml +├── lib +│ └── *.jar +├── logs +└── policy +| ├── cmd-line-actions.xml +| ├── cmd-line-options.xml +| ├── core +| │ ├── elements.xml +| │ ├── product-type-element-map.xml +| │ └── product-types.xml +| | +| ├── trace +| | ├── elements.xml +| | ├── product-type-element-map.xml +| | └── product-types.xml +| | +| ├── geo +| | ├── elements.xml +| | ├── product-type-element-map.xml +| | └── product-types.xml +| | +| (additional policy sub directories) +└── run +Please note, if you are using version 0.3 of OODT or earlier, the policy directory will look like this (with no sub directories): + +└── policy + ├── elements.xml + ├── product-type-element-map.xml + └── product-types.xml +Here is a brief description of each directory that you see listed: + +bin : contains shell convenience scripts for launching java classes +etc : contains configuration files, i.e. *.property and *.xml files +lib : contains java resources, i.e *.jar files +logs : contains file manager log files. +policy : contains product specifications, i.e *.xml specification files +The bin directory contains a number of executables: + +filemgr : file manager (startup/shutdown) script +filemgr-client : file manager client interface script +query-tool : catalog query tool +Configuring and Running the File Manager +You're now ready to run the file manager! + +$ cd /usr/local/oodt/cas-filemgr/bin +$ ./filemgr --help +Usage: ./filemgr {start|stop|status} +$ ./filemgr start +Whats going to happen? +The filemgr should be up and running, however, some WARNING messages may appear, complaining about configuration. + +If you get a java.net.BindException exception, make sure that no other service is running on port 9000. This is the port for an RPC interface that will be used for transferring data files into a repository. + +There's also a new file in the /usr/local/oodt/run directory. The file contains the filemgr process id. This is typical for *nix service house keeping. It is done to try and avoid running multiple filemgr services. + +There's also a new log file /usr/local/oodt/cas-filemgr/logs/cas_filemgr0.log. Tailing this file can often alert to you problems. + +$ tail -f /usr/local/oodt/cas-filemgr/logs/cas_filemgr0.log + +Now for some configuration +To do anything useful with your filemgr, you will need to specify some configurations in the /usr/local/oodt/cas-filemgr/etc/filemgr.properties file. + +Here is a basic modification to the filemgr.properties file: + +filemgr.properties +org.apache.oodt.cas.filemgr.catalog.lucene.idxPath=/usr/local/oodt/cas-filemgr/catalog +org.apache.oodt.cas.filemgr.repositorymgr.dirs=file:///usr/local/oodt/cas-filemgr/policy/core +org.apache.oodt.cas.filemgr.validation.dirs=file:///usr/local/oodt/cas-filemgr/policy/core +org.apache.oodt.cas.filemgr.mime.type.repository=/usr/local/oodt/cas-filemgr/etc/mime-types.xml +You will also need to specify a repository path in the product-types.xml file. Make sure that this path exists before you change the repository path xml element. + +product-types.xml + +Restart your filemgr so that it re-reads the filemgr.properties and product-types.xml: +$ cd /usr/local/oodt/cas-filemgr/bin +$ ./filemgr restart + +What have we configured? +A place to store your catalog, i.e. the database of metadata. +A place to store your ingested files, i.e. the repository. +The location of your policy directory for product specifications. +Your mime-types configuration file for file recognition. +How metadata is collected? +Now for some brief notes about how metadata is collected. The filemgr captures metadata in two different ways - from client side metadata extraction and server side metadata extraction. + +Client side metadata is passed to the filemgr via an xml formatted metadata file. E.g. a file called blah.txt can have a metadata file called blah.txt.met. This met file can be created in many ways, even by hand! And thats exactly what we're going to do. + +Server side metadata is generated by using java classes and the extractors that will be used are configured in the product-types.xml file in the chosen policy directory. For this example configuration, you should have /usr/local/oodt/cas-filemgr/policy/oodt as the policy directory, unless you're running version 0.3 or earlier of OODT, in which case you should have /usr/local/oodt/cas-filemgr/policy as the policy directory. + +Now would be a good time to have a quick look at the product-types.xml file. It contains some critical information about what is going to happen when we ingest our first file into the repository. + +Specified in the product-types.xml file, there is a default product type called GenericFile. This is the product type that we are going to use for the first file for ingestion. + +For the GenericFile type find the key. It's specifying some metadata. We're defining the product type! + +For the GenericFile type find the key. It's specifying some extractors to use for server side metadata extraction, namely: CoreMetExtractor, MimeTypeExtractor, FinalFileLocationExtractor. For more details about metadata and extractors see Metadata Extractors. + +If you're feeling curious, check out the other xml files in the /usr/local/oodt/cas-filemgr/policy subdirectories to get a better feel for how we define product types and elements. For a discussion of best practices w.r.t File Manager Policy, the reader is referred to Everything you want to know about File Manager Policy + +A brief overview of filemgr-client and query-tool +These commands are found in /usr/local/oodt/cas-filemgr/bin. + +Command: filemgr-client +In order to trigger a file ingestion we're going to use the filemgr-client. This is by no means the most automated way to ingest data into an repository, however it's a really easy and intuitive way to trigger a file ingestion. The filemgr-client is a wrapper script, making it easier to invoke a java executable from the command line. + +$ cd /usr/local/oodt/cas-filemgr/bin +$ ./filemgr-client --help +filemgr-client --url --operation [ [params]] +operations: +--addProductType --typeName --typeDesc + --repository --versionClass +--ingestProduct --productName --productStructure + --productTypeName --metadataFile + [--clientTransfer --dataTransfer ] + --refs ... +--hasProduct --productName +--getProductTypeByName --productTypeName +--getNumProducts --productTypeName +--getFirstPage --productTypeName +--getNextPage --productTypeName --currentPageNum +--getPrevPage --productTypeName --currentPageNum +--getLastPage --productTypeName +--getCurrentTransfer +--getCurrentTransfers +--getProductPctTransferred --productId --productTypeName +--getFilePctTransferred --origRef +As you can see there's a number of different ways this command can be executed. + +The first command line argument is --url. This is the location of the filemgr xml-rpc data transfer interface. Looking at the filemgr logs (specifically cas_filemgr0.log), we see an INFO statement telling us that local data transfer is enable on http://localhost:9000. This is the url that we need to specify. + +The second command line argument is --operation and there are 13 different types of operations that are possible! For now we are going to use the --ingestProduct operation. From the help command you can see that the --ingestProduct operation requires some further command line arguments to be specified. + +However, before we take a look at the --operation --ingestProduct, I would first like to shed a bit more light on the query-tool command. + +Command: query-tool +This is a very useful wrapper script to query the content of your repository. + +$ cd /usr/local/oodt/cas-filemgr/bin +$ ./query-tool +Must specify a query and filemgr url! +Usage: QueryTool [options] +options: +--url + Lucene like query options: + --lucene + -query + SQL like query options: + --sql + -query + -sortBy + -outputFormat +We see that we need to set some command line arguments to get anything useful out of the query tool. Try the next command: + +$ ./query-tool --url http://localhost:9000 --sql -query 'SELECT * FROM GenericFile' + +This should throw an exception, telling us it failed to perform a query. This is because there is no catalog yet (and therefore the GenericFile information does not exist). In fact if you have a look there is no catalog directory: + +$ ls /usr/local/oodt/cas-filemgr/catalog +ls: /usr/local/oodt/cas-filemgr/catalog: No such file or directory + +A Typical User Scenario +Time to ingest a very, very simple file. If you have not already, restart your filemgr so that it re-reads the filemgr.properties: +$ cd /usr/local/oodt/cas-filemgr/bin +$ ./filemgr restart + +For this simple ingestion we are not going to include any client side metadata, all the metadata collection will happen on the server side using the specified *Extractor extractors in the product-types.xml file. + +Create a text file and its metadata file for ingestion: +$ echo 'hello' > /tmp/blah.txt +$ touch /tmp/blah.txt.met + +Add the following xml to the /tmp/blah.txt.met file: + +blah.txt.met + + +Lets ingest the file! For --operation --ingestProduct we need to specify the following arguments: + +--productName : The name you want for your ingested product +--productStructure : Flat file or directory (i.e. hierarchical). Yes... we can ingest whole directories as one product +--productTypeName : A product type (as per product-types.xml) +--metadataFile : The client side metadata file +--refs : The product location +There's also an optional argument --clientTransfer, however, we're going to leave this and use the default local transfer. +[--clientTransfer --dataTransfer ] + +Here is the complete command: +$ ./filemgr-client --url http://localhost:9000 --operation --ingestProduct --productName blah.txt --productStructure Flat --productTypeName GenericFile --metadataFile file:///tmp/blah.txt.met --refs file:///tmp/blah.txt + +The output should look like: +Sep 16, 2011 2:09:42 PM org.apache.oodt.cas.filemgr.system.XmlRpcFileManagerClient +... +... +ingestProduct: Result: c2fbf4b9-e05c-11e0-9022-77a707615e7f + +You've just archived your first file (眨眼). + +To complete the process, lets see if we can retrieve the metadata. Run the query command again: +$ cd /usr/local/oodt/cas-filemgr/bin +$ ./query-tool --url http://localhost:9000 --sql -query 'SELECT * FROM GenericFile' + +The output should look like: +Sep 16, 2011 2:21:54 PM org.apache.oodt.cas.filemgr.system.XmlRpcFileManager complexQuery +INFO: Query returned 1 results +/var/archive/data/blah.txt,GenericFile,blah.txt,blah.txt,2011-09-16T14:09:43.405+02:00,c2fbf4b9-e05c-11e0-9022-77a707615e7f,Flat,text/plain,text,plain + +Check to see if the file has appeared in the archive: +$ ls /var/archive/data/blah.txt/ +blah.txt + +Query commands do not depend on the underlying catalog implementation. The --sql and --lucene instead describe the filemgr query syntax. + +At the time of writing this tutorial, composing queries using query-tool is not entirely straight forward, but entirely usable. Formatting of these queries is critical, small deviations from the syntax can result in the query return an unexpected value or throwing an exception. + +Some things to note about SQL queries: + +Use double quotes ("") for when specifying the SQL syntax. The single quote ('') is used for string values in a WHERE clause, e.g WHERE Filename='blah.txt' +Count the number of -- before each command line option. Some are -- and others are -. +The order of the return values for a search is not guaranteed unless you specify the \outputFormat option. +Here is a somewhat verbose example that uses all the SQL-like syntax that I am currently aware of (apologies for all the line breaks). + +$ cd /usr/local/oodt/cas-filemgr/bin +$ ./query-tool --url http://localhost:9000 --sql \ +-query "SELECT CAS.ProductReceivedTime,CAS.ProductName,CAS.ProductId,ProductType,\ +ProductStructure,Filename,FileLocation,MimeType \ +FROM GenericFile WHERE Filename='blah.txt'" -sortBy 'CAS.ProductReceivedTime' \ +-outputFormat '$CAS.ProductReceivedTime,$CAS.ProductName,$CAS.ProductId,$ProductType,\ +$ProductStructure,$Filename,$FileLocation,$MimeType' +The output should look like: +2011-10-07T10:59:12.031+02:00,blah.txt,a00616c6-f0c2-11e0-baf4-65c684787732, +GenericFile,Flat,blah.txt,/var/kat/archive/data/blah.txt,text/plain + +Now you can also check out some of the other 12 --operation possibilities for filemgr-client. For instance: + +$ ./filemgr-client --url http://localhost:9000 --operation --hasProduct --productName blah.txt + +Or: + +$ ./filemgr-client --url http://localhost:9000 --operation --getFirstPage --productTypeName GenericFile + +A few more tools +Cameron Goodale has written some useful command line tools aliases that are worth mentioning before we continue. See the following two web pages: https://issues.apache.org/jira/browse/OODT-306 +BASH and TCSH shell tools for File Manager + +Tips and Tricks for FileManager +Q: My Lucene Index Catalog is running slow now that I have over 100,000 products cataloged. How can I get the speed back? + +A: Run this command: +java -Djava.endorsed.dirs= org.apache.oodt.cas.filemgr.tools.OptimizeLuceneCatalog --catalogPath \ No newline at end of file diff --git a/src/main/resources/cdtocode/doc/Apache OODT File Manager/OODT Filemgr User Guide-simEnts.txt b/src/main/resources/cdtocode/doc/Apache OODT File Manager/OODT Filemgr User Guide-simEnts.txt new file mode 100644 index 0000000000000000000000000000000000000000..d54f7ddc463251b78178bfc3b81e55ec2ccf5216 --- /dev/null +++ b/src/main/resources/cdtocode/doc/Apache OODT File Manager/OODT Filemgr User Guide-simEnts.txt @@ -0,0 +1,299 @@ +OODT Filemgr User Guide +页面… CAS User Guides +跳到banner的尾部 +回到标题开始 +转至元数据结尾 +由 Thomas Bennett创建, 最终由 Nadeeshan Gimhana修改于 五月 17, 2019转至元数据起始 +The File Manager +An Overview of What is Installed +Configuring and Running the File Manager +Whats going to happen? +Now for some configuration +What have we configured? +How metadata is collected? +A brief overview of filemgr-client and query-tool +Command: filemgr-client +Command: query-tool +A Typical User Scenario +A few more tools +Tips and Tricks for FileManager +The File Manager +This self guided tutorial is intended for first time users. + +The fact that you've found this page, I assume that you are seriously thinking of using the OODT File Manager but are eager to get something up and running. It hopefully also means that you've checked out the code and built a cas-filemgr install target (e.g. a cas-filemgr-${version}-dist.tar.gz file). + +This tutorial is by no means a complete overview of all the File Managers functionality. However, it's an attempt to get you started using the basic tools. Like learning to drive a car, the most difficult part is getting it started and on the road! + +The following topics are covered on this page: + +An Overview of What is Installed +Configuring and Running the File Manager +A Typical User Scenario - ingesting and querying +An Overview of What is Installed +Assumption - you have built or have access to a cas-filemgr install target. This also means that you've correctly configured maven and java for your system. + +Here are the commands to install the cas-filemgr target from a tarfile. You will need to fit in the "..." with the appropriate content. + +$ mkdir -p /usr/local/oodt/ +$ tar xzvf .../filemgr/target/cas-filemgr-${version}-dist.tar.gz -C /usr/local/oodt/ +$ cd /usr/local/oodt/ +$ ln -s cas-filemgr-${version}/ cas-filemgr +The decompressed tar file creates a directory structure that looks as follows: + +. +├── bin +│ ├── filemgr +│ ├── filemgr-client +│ └── query-tool +├── etc +│ ├── filemgr.properties +│ └── mime-types.xml +├── lib +│ └── *.jar +├── logs +└── policy +| ├── cmd-line-actions.xml +| ├── cmd-line-options.xml +| ├── core +| │ ├── elements.xml +| │ ├── product-type-element-map.xml +| │ └── product-types.xml +| | +| ├── trace +| | ├── elements.xml +| | ├── product-type-element-map.xml +| | └── product-types.xml +| | +| ├── geo +| | ├── elements.xml +| | ├── product-type-element-map.xml +| | └── product-types.xml +| | +| (additional policy sub directories) +└── run +Please note, if you are using version 0.3 of OODT or earlier, the policy directory will look like this (with no sub directories): + +└── policy + ├── elements.xml + ├── product-type-element-map.xml + └── product-types.xml +Here is a brief description of each directory that you see listed: + +bin : contains shell convenience scripts for launching java classes +etc : contains configuration files, i.e. *.property and *.xml files +lib : contains java resources, i.e *.jar files +logs : contains file manager log files. +policy : contains product specifications, i.e *.xml specification files +The bin directory contains a number of executables: + +filemgr : file manager (startup/shutdown) script +filemgr-client : file manager client interface script +query-tool : catalog query tool +Configuring and Running the File Manager +You're now ready to run the file manager! + +$ cd /usr/local/oodt/cas-filemgr/bin +$ ./filemgr --help +Usage: ./filemgr {start|stop|status} +$ ./filemgr start +Whats going to happen? +The filemgr should be up and running, however, some WARNING messages may appear, complaining about configuration. + +If you get a java.net.BindException exception, make sure that no other service is running on port 9000. This is the port for an RPC interface that will be used for transferring data files into a repository. + +There's also a new file in the /usr/local/oodt/run directory. The file contains the filemgr process id. This is typical for *nix service house keeping. It is done to try and avoid running multiple filemgr services. + +There's also a new log file /usr/local/oodt/cas-filemgr/logs/cas_filemgr0.log. Tailing this file can often alert to you problems. + +$ tail -f /usr/local/oodt/cas-filemgr/logs/cas_filemgr0.log + +Now for some configuration +To do anything useful with your filemgr, you will need to specify some configurations in the /usr/local/oodt/cas-filemgr/etc/filemgr.properties file. + +Here is a basic modification to the filemgr.properties file: + +filemgr.properties +org.apache.oodt.cas.filemgr.catalog.lucene.idxPath=/usr/local/oodt/cas-filemgr/catalog +org.apache.oodt.cas.filemgr.repositorymgr.dirs=file:///usr/local/oodt/cas-filemgr/policy/core +org.apache.oodt.cas.filemgr.validation.dirs=file:///usr/local/oodt/cas-filemgr/policy/core +org.apache.oodt.cas.filemgr.mime.type.repository=/usr/local/oodt/cas-filemgr/etc/mime-types.xml +You will also need to specify a repository path in the product-types.xml file. Make sure that this path exists before you change the repository path xml element. + +product-types.xml + +Restart your filemgr so that it re-reads the filemgr.properties and product-types.xml: +$ cd /usr/local/oodt/cas-filemgr/bin +$ ./filemgr restart + +What have we configured? +A place to store your catalog, i.e. the database of metadata. +A place to store your ingested files, i.e. the repository. +The location of your policy directory for product specifications. +Your mime-types configuration file for file recognition. +How metadata is collected? +Now for some brief notes about how metadata is collected. The filemgr captures metadata in two different ways - from client side metadata extraction and server side metadata extraction. + +Client side metadata is passed to the filemgr via an xml formatted metadata file. E.g. a file called blah.txt can have a metadata file called blah.txt.met. This met file can be created in many ways, even by hand! And thats exactly what we're going to do. + +Server side metadata is generated by using java classes and the extractors that will be used are configured in the product-types.xml file in the chosen policy directory. For this example configuration, you should have /usr/local/oodt/cas-filemgr/policy/oodt as the policy directory, unless you're running version 0.3 or earlier of OODT, in which case you should have /usr/local/oodt/cas-filemgr/policy as the policy directory. + +Now would be a good time to have a quick look at the product-types.xml file. It contains some critical information about what is going to happen when we ingest our first file into the repository. + +Specified in the product-types.xml file, there is a default product type called GenericFile. This is the product type that we are going to use for the first file for ingestion. + +For the GenericFile type find the key. It's specifying some metadata. We're defining the product type! + +For the GenericFile type find the key. It's specifying some extractors to use for server side metadata extraction, namely: CoreMetExtractor, MimeTypeExtractor, FinalFileLocationExtractor. For more details about metadata and extractors see Metadata Extractors. + +If you're feeling curious, check out the other xml files in the /usr/local/oodt/cas-filemgr/policy subdirectories to get a better feel for how we define product types and elements. For a discussion of best practices w.r.t File Manager Policy, the reader is referred to Everything you want to know about File Manager Policy + +A brief overview of filemgr-client and query-tool +These commands are found in /usr/local/oodt/cas-filemgr/bin. + +Command: filemgr-client +In order to trigger a file ingestion we're going to use the filemgr-client. This is by no means the most automated way to ingest data into an repository, however it's a really easy and intuitive way to trigger a file ingestion. The filemgr-client is a wrapper script, making it easier to invoke a java executable from the command line. + +$ cd /usr/local/oodt/cas-filemgr/bin +$ ./filemgr-client --help +filemgr-client --url --operation [ [params]] +operations: +--addProductType --typeName --typeDesc + --repository --versionClass +--ingestProduct --productName --productStructure + --productTypeName --metadataFile + [--clientTransfer --dataTransfer ] + --refs ... +--hasProduct --productName +--getProductTypeByName --productTypeName +--getNumProducts --productTypeName +--getFirstPage --productTypeName +--getNextPage --productTypeName --currentPageNum +--getPrevPage --productTypeName --currentPageNum +--getLastPage --productTypeName +--getCurrentTransfer +--getCurrentTransfers +--getProductPctTransferred --productId --productTypeName +--getFilePctTransferred --origRef +As you can see there's a number of different ways this command can be executed. + +The first command line argument is --url. This is the location of the filemgr xml-rpc data transfer interface. Looking at the filemgr logs (specifically cas_filemgr0.log), we see an INFO statement telling us that local data transfer is enable on http://localhost:9000. This is the url that we need to specify. + +The second command line argument is --operation and there are 13 different types of operations that are possible! For now we are going to use the --ingestProduct operation. From the help command you can see that the --ingestProduct operation requires some further command line arguments to be specified. + +However, before we take a look at the --operation --ingestProduct, I would first like to shed a bit more light on the query-tool command. + +Command: query-tool +This is a very useful wrapper script to query the content of your repository. + +$ cd /usr/local/oodt/cas-filemgr/bin +$ ./query-tool +Must specify a query and filemgr url! +Usage: QueryTool [options] +options: +--url + Lucene like query options: + --lucene + -query + SQL like query options: + --sql + -query + -sortBy + -outputFormat +We see that we need to set some command line arguments to get anything useful out of the query tool. Try the next command: + +$ ./query-tool --url http://localhost:9000 --sql -query 'SELECT * FROM GenericFile' + +This should throw an exception, telling us it failed to perform a query. This is because there is no catalog yet (and therefore the GenericFile information does not exist). In fact if you have a look there is no catalog directory: + +$ ls /usr/local/oodt/cas-filemgr/catalog +ls: /usr/local/oodt/cas-filemgr/catalog: No such file or directory + +A Typical User Scenario +Time to ingest a very, very simple file. If you have not already, restart your filemgr so that it re-reads the filemgr.properties: +$ cd /usr/local/oodt/cas-filemgr/bin +$ ./filemgr restart + +For this simple ingestion we are not going to include any client side metadata, all the metadata collection will happen on the server side using the specified *Extractor extractors in the product-types.xml file. + +Create a text file and its metadata file for ingestion: +$ echo 'hello' > /tmp/blah.txt +$ touch /tmp/blah.txt.met + +Add the following xml to the /tmp/blah.txt.met file: + +blah.txt.met + + +Lets ingest the file! For --operation --ingestProduct we need to specify the following arguments: + +--productName : The name you want for your ingested product +--productStructure : Flat file or directory (i.e. hierarchical). Yes... we can ingest whole directories as one product +--productTypeName : A product type (as per product-types.xml) +--metadataFile : The client side metadata file +--refs : The product location +There's also an optional argument --clientTransfer, however, we're going to leave this and use the default local transfer. +[--clientTransfer --dataTransfer ] + +Here is the complete command: +$ ./filemgr-client --url http://localhost:9000 --operation --ingestProduct --productName blah.txt --productStructure Flat --productTypeName GenericFile --metadataFile file:///tmp/blah.txt.met --refs file:///tmp/blah.txt + +The output should look like: +Sep 16, 2011 2:09:42 PM org.apache.oodt.cas.filemgr.system.XmlRpcFileManagerClient +... +... +ingestProduct: Result: c2fbf4b9-e05c-11e0-9022-77a707615e7f + +You've just archived your first file (眨眼). + +To complete the process, lets see if we can retrieve the metadata. Run the query command again: +$ cd /usr/local/oodt/cas-filemgr/bin +$ ./query-tool --url http://localhost:9000 --sql -query 'SELECT * FROM GenericFile' + +The output should look like: +Sep 16, 2011 2:21:54 PM org.apache.oodt.cas.filemgr.system.XmlRpcFileManager complexQuery +INFO: Query returned 1 results +/var/archive/data/blah.txt,GenericFile,blah.txt,blah.txt,2011-09-16T14:09:43.405+02:00,c2fbf4b9-e05c-11e0-9022-77a707615e7f,Flat,text/plain,text,plain + +Check to see if the file has appeared in the archive: +$ ls /var/archive/data/blah.txt/ +blah.txt + +Query commands do not depend on the underlying catalog implementation. The --sql and --lucene instead describe the filemgr query syntax. + +At the time of writing this tutorial, composing queries using query-tool is not entirely straight forward, but entirely usable. Formatting of these queries is critical, small deviations from the syntax can result in the query return an unexpected value or throwing an exception. + +Some things to note about SQL queries: + +Use double quotes ("") for when specifying the SQL syntax. The single quote ('') is used for string values in a WHERE clause, e.g WHERE Filename='blah.txt' +Count the number of -- before each command line option. Some are -- and others are -. +The order of the return values for a search is not guaranteed unless you specify the \outputFormat option. +Here is a somewhat verbose example that uses all the SQL-like syntax that I am currently aware of (apologies for all the line breaks). + +$ cd /usr/local/oodt/cas-filemgr/bin +$ ./query-tool --url http://localhost:9000 --sql \ +-query "SELECT CAS.ProductReceivedTime,CAS.ProductName,CAS.ProductId,ProductType,\ +ProductStructure,Filename,FileLocation,MimeType \ +FROM GenericFile WHERE Filename='blah.txt'" -sortBy 'CAS.ProductReceivedTime' \ +-outputFormat '$CAS.ProductReceivedTime,$CAS.ProductName,$CAS.ProductId,$ProductType,\ +$ProductStructure,$Filename,$FileLocation,$MimeType' +The output should look like: +2011-10-07T10:59:12.031+02:00,blah.txt,a00616c6-f0c2-11e0-baf4-65c684787732, +GenericFile,Flat,blah.txt,/var/kat/archive/data/blah.txt,text/plain + +Now you can also check out some of the other 12 --operation possibilities for filemgr-client. For instance: + +$ ./filemgr-client --url http://localhost:9000 --operation --hasProduct --productName blah.txt + +Or: + +$ ./filemgr-client --url http://localhost:9000 --operation --getFirstPage --productTypeName GenericFile + +A few more tools +Cameron Goodale has written some useful command line tools aliases that are worth mentioning before we continue. See the following two web pages: https://issues.apache.org/jira/browse/OODT-306 +BASH and TCSH shell tools for File Manager + +Tips and Tricks for FileManager +Q: My Lucene Index Catalog is running slow now that I have over 100,000 products cataloged. How can I get the speed back? + +A: Run this command: +java -Djava.endorsed.dirs= org.apache.oodt.cas.filemgr.tools.OptimizeLuceneCatalog --catalogPath \ No newline at end of file diff --git a/src/main/resources/cdtocode/doc/Apache OODT File Manager/OODT Filemgr User Guide.txt b/src/main/resources/cdtocode/doc/Apache OODT File Manager/OODT Filemgr User Guide.txt new file mode 100644 index 0000000000000000000000000000000000000000..d54f7ddc463251b78178bfc3b81e55ec2ccf5216 --- /dev/null +++ b/src/main/resources/cdtocode/doc/Apache OODT File Manager/OODT Filemgr User Guide.txt @@ -0,0 +1,299 @@ +OODT Filemgr User Guide +页面… CAS User Guides +跳到banner的尾部 +回到标题开始 +转至元数据结尾 +由 Thomas Bennett创建, 最终由 Nadeeshan Gimhana修改于 五月 17, 2019转至元数据起始 +The File Manager +An Overview of What is Installed +Configuring and Running the File Manager +Whats going to happen? +Now for some configuration +What have we configured? +How metadata is collected? +A brief overview of filemgr-client and query-tool +Command: filemgr-client +Command: query-tool +A Typical User Scenario +A few more tools +Tips and Tricks for FileManager +The File Manager +This self guided tutorial is intended for first time users. + +The fact that you've found this page, I assume that you are seriously thinking of using the OODT File Manager but are eager to get something up and running. It hopefully also means that you've checked out the code and built a cas-filemgr install target (e.g. a cas-filemgr-${version}-dist.tar.gz file). + +This tutorial is by no means a complete overview of all the File Managers functionality. However, it's an attempt to get you started using the basic tools. Like learning to drive a car, the most difficult part is getting it started and on the road! + +The following topics are covered on this page: + +An Overview of What is Installed +Configuring and Running the File Manager +A Typical User Scenario - ingesting and querying +An Overview of What is Installed +Assumption - you have built or have access to a cas-filemgr install target. This also means that you've correctly configured maven and java for your system. + +Here are the commands to install the cas-filemgr target from a tarfile. You will need to fit in the "..." with the appropriate content. + +$ mkdir -p /usr/local/oodt/ +$ tar xzvf .../filemgr/target/cas-filemgr-${version}-dist.tar.gz -C /usr/local/oodt/ +$ cd /usr/local/oodt/ +$ ln -s cas-filemgr-${version}/ cas-filemgr +The decompressed tar file creates a directory structure that looks as follows: + +. +├── bin +│ ├── filemgr +│ ├── filemgr-client +│ └── query-tool +├── etc +│ ├── filemgr.properties +│ └── mime-types.xml +├── lib +│ └── *.jar +├── logs +└── policy +| ├── cmd-line-actions.xml +| ├── cmd-line-options.xml +| ├── core +| │ ├── elements.xml +| │ ├── product-type-element-map.xml +| │ └── product-types.xml +| | +| ├── trace +| | ├── elements.xml +| | ├── product-type-element-map.xml +| | └── product-types.xml +| | +| ├── geo +| | ├── elements.xml +| | ├── product-type-element-map.xml +| | └── product-types.xml +| | +| (additional policy sub directories) +└── run +Please note, if you are using version 0.3 of OODT or earlier, the policy directory will look like this (with no sub directories): + +└── policy + ├── elements.xml + ├── product-type-element-map.xml + └── product-types.xml +Here is a brief description of each directory that you see listed: + +bin : contains shell convenience scripts for launching java classes +etc : contains configuration files, i.e. *.property and *.xml files +lib : contains java resources, i.e *.jar files +logs : contains file manager log files. +policy : contains product specifications, i.e *.xml specification files +The bin directory contains a number of executables: + +filemgr : file manager (startup/shutdown) script +filemgr-client : file manager client interface script +query-tool : catalog query tool +Configuring and Running the File Manager +You're now ready to run the file manager! + +$ cd /usr/local/oodt/cas-filemgr/bin +$ ./filemgr --help +Usage: ./filemgr {start|stop|status} +$ ./filemgr start +Whats going to happen? +The filemgr should be up and running, however, some WARNING messages may appear, complaining about configuration. + +If you get a java.net.BindException exception, make sure that no other service is running on port 9000. This is the port for an RPC interface that will be used for transferring data files into a repository. + +There's also a new file in the /usr/local/oodt/run directory. The file contains the filemgr process id. This is typical for *nix service house keeping. It is done to try and avoid running multiple filemgr services. + +There's also a new log file /usr/local/oodt/cas-filemgr/logs/cas_filemgr0.log. Tailing this file can often alert to you problems. + +$ tail -f /usr/local/oodt/cas-filemgr/logs/cas_filemgr0.log + +Now for some configuration +To do anything useful with your filemgr, you will need to specify some configurations in the /usr/local/oodt/cas-filemgr/etc/filemgr.properties file. + +Here is a basic modification to the filemgr.properties file: + +filemgr.properties +org.apache.oodt.cas.filemgr.catalog.lucene.idxPath=/usr/local/oodt/cas-filemgr/catalog +org.apache.oodt.cas.filemgr.repositorymgr.dirs=file:///usr/local/oodt/cas-filemgr/policy/core +org.apache.oodt.cas.filemgr.validation.dirs=file:///usr/local/oodt/cas-filemgr/policy/core +org.apache.oodt.cas.filemgr.mime.type.repository=/usr/local/oodt/cas-filemgr/etc/mime-types.xml +You will also need to specify a repository path in the product-types.xml file. Make sure that this path exists before you change the repository path xml element. + +product-types.xml + +Restart your filemgr so that it re-reads the filemgr.properties and product-types.xml: +$ cd /usr/local/oodt/cas-filemgr/bin +$ ./filemgr restart + +What have we configured? +A place to store your catalog, i.e. the database of metadata. +A place to store your ingested files, i.e. the repository. +The location of your policy directory for product specifications. +Your mime-types configuration file for file recognition. +How metadata is collected? +Now for some brief notes about how metadata is collected. The filemgr captures metadata in two different ways - from client side metadata extraction and server side metadata extraction. + +Client side metadata is passed to the filemgr via an xml formatted metadata file. E.g. a file called blah.txt can have a metadata file called blah.txt.met. This met file can be created in many ways, even by hand! And thats exactly what we're going to do. + +Server side metadata is generated by using java classes and the extractors that will be used are configured in the product-types.xml file in the chosen policy directory. For this example configuration, you should have /usr/local/oodt/cas-filemgr/policy/oodt as the policy directory, unless you're running version 0.3 or earlier of OODT, in which case you should have /usr/local/oodt/cas-filemgr/policy as the policy directory. + +Now would be a good time to have a quick look at the product-types.xml file. It contains some critical information about what is going to happen when we ingest our first file into the repository. + +Specified in the product-types.xml file, there is a default product type called GenericFile. This is the product type that we are going to use for the first file for ingestion. + +For the GenericFile type find the key. It's specifying some metadata. We're defining the product type! + +For the GenericFile type find the key. It's specifying some extractors to use for server side metadata extraction, namely: CoreMetExtractor, MimeTypeExtractor, FinalFileLocationExtractor. For more details about metadata and extractors see Metadata Extractors. + +If you're feeling curious, check out the other xml files in the /usr/local/oodt/cas-filemgr/policy subdirectories to get a better feel for how we define product types and elements. For a discussion of best practices w.r.t File Manager Policy, the reader is referred to Everything you want to know about File Manager Policy + +A brief overview of filemgr-client and query-tool +These commands are found in /usr/local/oodt/cas-filemgr/bin. + +Command: filemgr-client +In order to trigger a file ingestion we're going to use the filemgr-client. This is by no means the most automated way to ingest data into an repository, however it's a really easy and intuitive way to trigger a file ingestion. The filemgr-client is a wrapper script, making it easier to invoke a java executable from the command line. + +$ cd /usr/local/oodt/cas-filemgr/bin +$ ./filemgr-client --help +filemgr-client --url --operation [ [params]] +operations: +--addProductType --typeName --typeDesc + --repository --versionClass +--ingestProduct --productName --productStructure + --productTypeName --metadataFile + [--clientTransfer --dataTransfer ] + --refs ... +--hasProduct --productName +--getProductTypeByName --productTypeName +--getNumProducts --productTypeName +--getFirstPage --productTypeName +--getNextPage --productTypeName --currentPageNum +--getPrevPage --productTypeName --currentPageNum +--getLastPage --productTypeName +--getCurrentTransfer +--getCurrentTransfers +--getProductPctTransferred --productId --productTypeName +--getFilePctTransferred --origRef +As you can see there's a number of different ways this command can be executed. + +The first command line argument is --url. This is the location of the filemgr xml-rpc data transfer interface. Looking at the filemgr logs (specifically cas_filemgr0.log), we see an INFO statement telling us that local data transfer is enable on http://localhost:9000. This is the url that we need to specify. + +The second command line argument is --operation and there are 13 different types of operations that are possible! For now we are going to use the --ingestProduct operation. From the help command you can see that the --ingestProduct operation requires some further command line arguments to be specified. + +However, before we take a look at the --operation --ingestProduct, I would first like to shed a bit more light on the query-tool command. + +Command: query-tool +This is a very useful wrapper script to query the content of your repository. + +$ cd /usr/local/oodt/cas-filemgr/bin +$ ./query-tool +Must specify a query and filemgr url! +Usage: QueryTool [options] +options: +--url + Lucene like query options: + --lucene + -query + SQL like query options: + --sql + -query + -sortBy + -outputFormat +We see that we need to set some command line arguments to get anything useful out of the query tool. Try the next command: + +$ ./query-tool --url http://localhost:9000 --sql -query 'SELECT * FROM GenericFile' + +This should throw an exception, telling us it failed to perform a query. This is because there is no catalog yet (and therefore the GenericFile information does not exist). In fact if you have a look there is no catalog directory: + +$ ls /usr/local/oodt/cas-filemgr/catalog +ls: /usr/local/oodt/cas-filemgr/catalog: No such file or directory + +A Typical User Scenario +Time to ingest a very, very simple file. If you have not already, restart your filemgr so that it re-reads the filemgr.properties: +$ cd /usr/local/oodt/cas-filemgr/bin +$ ./filemgr restart + +For this simple ingestion we are not going to include any client side metadata, all the metadata collection will happen on the server side using the specified *Extractor extractors in the product-types.xml file. + +Create a text file and its metadata file for ingestion: +$ echo 'hello' > /tmp/blah.txt +$ touch /tmp/blah.txt.met + +Add the following xml to the /tmp/blah.txt.met file: + +blah.txt.met + + +Lets ingest the file! For --operation --ingestProduct we need to specify the following arguments: + +--productName : The name you want for your ingested product +--productStructure : Flat file or directory (i.e. hierarchical). Yes... we can ingest whole directories as one product +--productTypeName : A product type (as per product-types.xml) +--metadataFile : The client side metadata file +--refs : The product location +There's also an optional argument --clientTransfer, however, we're going to leave this and use the default local transfer. +[--clientTransfer --dataTransfer ] + +Here is the complete command: +$ ./filemgr-client --url http://localhost:9000 --operation --ingestProduct --productName blah.txt --productStructure Flat --productTypeName GenericFile --metadataFile file:///tmp/blah.txt.met --refs file:///tmp/blah.txt + +The output should look like: +Sep 16, 2011 2:09:42 PM org.apache.oodt.cas.filemgr.system.XmlRpcFileManagerClient +... +... +ingestProduct: Result: c2fbf4b9-e05c-11e0-9022-77a707615e7f + +You've just archived your first file (眨眼). + +To complete the process, lets see if we can retrieve the metadata. Run the query command again: +$ cd /usr/local/oodt/cas-filemgr/bin +$ ./query-tool --url http://localhost:9000 --sql -query 'SELECT * FROM GenericFile' + +The output should look like: +Sep 16, 2011 2:21:54 PM org.apache.oodt.cas.filemgr.system.XmlRpcFileManager complexQuery +INFO: Query returned 1 results +/var/archive/data/blah.txt,GenericFile,blah.txt,blah.txt,2011-09-16T14:09:43.405+02:00,c2fbf4b9-e05c-11e0-9022-77a707615e7f,Flat,text/plain,text,plain + +Check to see if the file has appeared in the archive: +$ ls /var/archive/data/blah.txt/ +blah.txt + +Query commands do not depend on the underlying catalog implementation. The --sql and --lucene instead describe the filemgr query syntax. + +At the time of writing this tutorial, composing queries using query-tool is not entirely straight forward, but entirely usable. Formatting of these queries is critical, small deviations from the syntax can result in the query return an unexpected value or throwing an exception. + +Some things to note about SQL queries: + +Use double quotes ("") for when specifying the SQL syntax. The single quote ('') is used for string values in a WHERE clause, e.g WHERE Filename='blah.txt' +Count the number of -- before each command line option. Some are -- and others are -. +The order of the return values for a search is not guaranteed unless you specify the \outputFormat option. +Here is a somewhat verbose example that uses all the SQL-like syntax that I am currently aware of (apologies for all the line breaks). + +$ cd /usr/local/oodt/cas-filemgr/bin +$ ./query-tool --url http://localhost:9000 --sql \ +-query "SELECT CAS.ProductReceivedTime,CAS.ProductName,CAS.ProductId,ProductType,\ +ProductStructure,Filename,FileLocation,MimeType \ +FROM GenericFile WHERE Filename='blah.txt'" -sortBy 'CAS.ProductReceivedTime' \ +-outputFormat '$CAS.ProductReceivedTime,$CAS.ProductName,$CAS.ProductId,$ProductType,\ +$ProductStructure,$Filename,$FileLocation,$MimeType' +The output should look like: +2011-10-07T10:59:12.031+02:00,blah.txt,a00616c6-f0c2-11e0-baf4-65c684787732, +GenericFile,Flat,blah.txt,/var/kat/archive/data/blah.txt,text/plain + +Now you can also check out some of the other 12 --operation possibilities for filemgr-client. For instance: + +$ ./filemgr-client --url http://localhost:9000 --operation --hasProduct --productName blah.txt + +Or: + +$ ./filemgr-client --url http://localhost:9000 --operation --getFirstPage --productTypeName GenericFile + +A few more tools +Cameron Goodale has written some useful command line tools aliases that are worth mentioning before we continue. See the following two web pages: https://issues.apache.org/jira/browse/OODT-306 +BASH and TCSH shell tools for File Manager + +Tips and Tricks for FileManager +Q: My Lucene Index Catalog is running slow now that I have over 100,000 products cataloged. How can I get the speed back? + +A: Run this command: +java -Djava.endorsed.dirs= org.apache.oodt.cas.filemgr.tools.OptimizeLuceneCatalog --catalogPath \ No newline at end of file diff --git a/src/main/resources/cdtocode/doc/Apache OODT File Manager/Package org.apache.oodt.cas.filemgr.cli.action-relation.txt b/src/main/resources/cdtocode/doc/Apache OODT File Manager/Package org.apache.oodt.cas.filemgr.cli.action-relation.txt new file mode 100644 index 0000000000000000000000000000000000000000..1c22f3309dfbbc720b09c3f5d59af9540e8c183b --- /dev/null +++ b/src/main/resources/cdtocode/doc/Apache OODT File Manager/Package org.apache.oodt.cas.filemgr.cli.action-relation.txt @@ -0,0 +1,26 @@ +Catalog and Archive File Management Component 0.12 API +Packages +Package Description +org.apache.oodt.cas.filemgr.catalog +org.apache.oodt.cas.filemgr.catalog.solr +org.apache.oodt.cas.filemgr.cli.action +org.apache.oodt.cas.filemgr.datatransfer +org.apache.oodt.cas.filemgr.exceptions +org.apache.oodt.cas.filemgr.ingest +org.apache.oodt.cas.filemgr.metadata +org.apache.oodt.cas.filemgr.metadata.extractors +org.apache.oodt.cas.filemgr.metadata.extractors.examples +org.apache.oodt.cas.filemgr.repository +org.apache.oodt.cas.filemgr.structs +org.apache.oodt.cas.filemgr.structs.exceptions +org.apache.oodt.cas.filemgr.structs.query +org.apache.oodt.cas.filemgr.structs.query.conv +org.apache.oodt.cas.filemgr.structs.query.filter +org.apache.oodt.cas.filemgr.structs.type +org.apache.oodt.cas.filemgr.structs.type.examples +org.apache.oodt.cas.filemgr.system +org.apache.oodt.cas.filemgr.system.auth +org.apache.oodt.cas.filemgr.tools +org.apache.oodt.cas.filemgr.util +org.apache.oodt.cas.filemgr.validation +org.apache.oodt.cas.filemgr.versioning \ No newline at end of file diff --git a/src/main/resources/cdtocode/doc/Apache OODT File Manager/Package org.apache.oodt.cas.filemgr.cli.action-simEnts.txt b/src/main/resources/cdtocode/doc/Apache OODT File Manager/Package org.apache.oodt.cas.filemgr.cli.action-simEnts.txt new file mode 100644 index 0000000000000000000000000000000000000000..1c22f3309dfbbc720b09c3f5d59af9540e8c183b --- /dev/null +++ b/src/main/resources/cdtocode/doc/Apache OODT File Manager/Package org.apache.oodt.cas.filemgr.cli.action-simEnts.txt @@ -0,0 +1,26 @@ +Catalog and Archive File Management Component 0.12 API +Packages +Package Description +org.apache.oodt.cas.filemgr.catalog +org.apache.oodt.cas.filemgr.catalog.solr +org.apache.oodt.cas.filemgr.cli.action +org.apache.oodt.cas.filemgr.datatransfer +org.apache.oodt.cas.filemgr.exceptions +org.apache.oodt.cas.filemgr.ingest +org.apache.oodt.cas.filemgr.metadata +org.apache.oodt.cas.filemgr.metadata.extractors +org.apache.oodt.cas.filemgr.metadata.extractors.examples +org.apache.oodt.cas.filemgr.repository +org.apache.oodt.cas.filemgr.structs +org.apache.oodt.cas.filemgr.structs.exceptions +org.apache.oodt.cas.filemgr.structs.query +org.apache.oodt.cas.filemgr.structs.query.conv +org.apache.oodt.cas.filemgr.structs.query.filter +org.apache.oodt.cas.filemgr.structs.type +org.apache.oodt.cas.filemgr.structs.type.examples +org.apache.oodt.cas.filemgr.system +org.apache.oodt.cas.filemgr.system.auth +org.apache.oodt.cas.filemgr.tools +org.apache.oodt.cas.filemgr.util +org.apache.oodt.cas.filemgr.validation +org.apache.oodt.cas.filemgr.versioning \ No newline at end of file diff --git a/src/main/resources/cdtocode/doc/Apache OODT File Manager/Package org.apache.oodt.cas.filemgr.cli.action.txt b/src/main/resources/cdtocode/doc/Apache OODT File Manager/Package org.apache.oodt.cas.filemgr.cli.action.txt new file mode 100644 index 0000000000000000000000000000000000000000..1c22f3309dfbbc720b09c3f5d59af9540e8c183b --- /dev/null +++ b/src/main/resources/cdtocode/doc/Apache OODT File Manager/Package org.apache.oodt.cas.filemgr.cli.action.txt @@ -0,0 +1,26 @@ +Catalog and Archive File Management Component 0.12 API +Packages +Package Description +org.apache.oodt.cas.filemgr.catalog +org.apache.oodt.cas.filemgr.catalog.solr +org.apache.oodt.cas.filemgr.cli.action +org.apache.oodt.cas.filemgr.datatransfer +org.apache.oodt.cas.filemgr.exceptions +org.apache.oodt.cas.filemgr.ingest +org.apache.oodt.cas.filemgr.metadata +org.apache.oodt.cas.filemgr.metadata.extractors +org.apache.oodt.cas.filemgr.metadata.extractors.examples +org.apache.oodt.cas.filemgr.repository +org.apache.oodt.cas.filemgr.structs +org.apache.oodt.cas.filemgr.structs.exceptions +org.apache.oodt.cas.filemgr.structs.query +org.apache.oodt.cas.filemgr.structs.query.conv +org.apache.oodt.cas.filemgr.structs.query.filter +org.apache.oodt.cas.filemgr.structs.type +org.apache.oodt.cas.filemgr.structs.type.examples +org.apache.oodt.cas.filemgr.system +org.apache.oodt.cas.filemgr.system.auth +org.apache.oodt.cas.filemgr.tools +org.apache.oodt.cas.filemgr.util +org.apache.oodt.cas.filemgr.validation +org.apache.oodt.cas.filemgr.versioning \ No newline at end of file diff --git a/src/main/resources/cdtocode/doc/Apache OODT File Manager/React file manager-relation.txt b/src/main/resources/cdtocode/doc/Apache OODT File Manager/React file manager-relation.txt new file mode 100644 index 0000000000000000000000000000000000000000..600b501136a12f398d7b5d1d04cd146a9e6d4d6e --- /dev/null +++ b/src/main/resources/cdtocode/doc/Apache OODT File Manager/React file manager-relation.txt @@ -0,0 +1 @@ +react file manager All components included in this dashboard template has been developed to bring all the potential of HTML5 and Bootstrap plus a set of new features (JS and CSS) ideal for your next dashboard admin theme or admin web application project. 확장성을 보유할것 외부 프로젝트에서도 Description. Redux helps you write applications that behave consistently, run in different environments (client, server, and native), and are easy to test. It has a beautiful design, as you can see from the live previews and it contains a LOT of components and features. filemanager namespace exposes the FileManagerCommand class that could be extended to implement a custom File Manager command. Just display a list within 2 predifined tabs (folders). Create React App - TS docs; Next. The Edit screen with option to select one or more files is displayed. Add Start script to package. 9,676 4. Inbuilt Search textbox in FileManager: See Also. Module files are represented in the design manager in a multi-pane module editor. To include the File Manager component in application import the FileManagerComponent from ej2-react-filemanager package in App. 1 - 28 November 2020 ----- - Upgraded Bootstrap version to 4. Filemanager with React & Nodejs . prod. The download manager handles HTTP connections, monitors connectivity changes, reboots, and ensures each download completes successfully. I would like this shortcode to be dynamic, i. Grab the demo from Github if you haven't done this yet. Mobile applications definitely offer a greater value to businesses than their mobile website In this tutorials we will use a package named @react-native-community/checkbox to add checkboxes in react native. Express your opinions freely and help others including your future self You can customize Storybook's webpack setup by providing a webpackFinal field in . 4. 3. import React from 'react'; import 'devextreme/dist/css/dx. js, Express and TypeScript. Install the React components and choose a theme that suits your needs. target. 2 - Ability to translate Wireframes and PSD Designs into functional web apps using HTML5, React , Node. In this tutorial you will learn how to create a working file upload component with react from scratch using no dependencies other than react itself. 2/6. /. You can rearrange the order of your files by dragging them around to move the important files to the top of the list for faster access. Run the Drupal Page having React Nested modals aren’t supported, but if you really need them the underlying react-overlays can support them if you're willing. Download the corresponding App Center SDK for iOS frameworks provided as a zip file and unzip it. NET Core suite along with 100+ fully-featured UI components designed to speed up delivery & improve every aspect of target. You’ll see a plus symbol to the left of the file or folder. com and its affiliated web properties is provided "as is" without warranty of any kind. An electron based file manager. The file manager application is like the heart of a smartphone. Modal's "trap" focus in them, ensuring the keyboard navigation cycles through the modal, and not the rest of the page. Async uploading with AJAX, or encode files as base64 data and send along form post. Overview of Kendo UI FileManager; Sort in Kendo UI FileManager; Toolbar Commands in Kendo UI FileManager Express your opinions freely and help others including your future self I am a beginner in react. LibraryManager. Developed with the latest jQuery plugins. html and . Complete file and folder manager: Create, rename, move and delete a folder. It's very important for me your collaboration on my development tasks and time. Test your JavaScript, CSS, HTML or CoffeeScript online with JSFiddle code editor. Go to react_code\src and change the apiUrl inside config. js as per your current url of Drupal. rtl8761a_mp_chip_bt40_fw_asic_rom_patch_8192ee_new. Say “EDIT MODE”. 1. To enable profiling in production mode, modify Webpack configuration file (config/webpack. To download and start utilizing Syncfusion's Essential Studio for React components, see our pricing model. A simple file manager built with react. azurewebsites. 0. Vue. Updated laravel 7 to all full version and starter-kit; React Fixed. mp4 videos to the server. Learn to build modern web applications using Angular, React & Vue! File Upload Component with Vue. A file input (dropzone) management component for React. TIP: If you have never seen a dot file (a file starting with a dot) it might be odd at first because that file might not appear in your file manager, as it’s a hidden file. Unlike vanilla Bootstrap, autoFocus works in Modals because React handles the implementation Free download Filedash – File Manager Dashboard Nulled. Maybe later i can have a button to have the grid view File-Manager 개발정의서 들어가며 본 문서는 인수인계 목적이 아닌 개발이 완료된 제품에 대한 이해를 돕기위해 제작된 개발 정의서입니다. Say for instance that you want to open the file select dialogue for a user to select an file to upload. This sample demonstrates how to utilize the Amazon S3 file system provider to manage the files in File Manager component. Deploy Trillo File Manager from GCP Marketplace In this tutorials we will use a package named @react-native-community/checkbox to add checkboxes in react native. Choose a device definition, Nexus 5X is suggestable. Create a new project with React Native. React also allows us to create reusable UI components. Use Git or checkout with SVN using the web URL. bs4 File Manager. v6. js . Source code: https://bit. Learn more . log(event. Also, you might want to customize the look of the file input in the form to make it resonate with your overall app design. Chocolatey is trusted by businesses to manage software deployments. Conclusion Let’s work that out. Scheduler. svg'; // replace it with your path // Profile upload helper const HandleImageUpload = => { // we are referencing the file input const imageRef = useRef(); // Specify the default image const [defaultUserImage React Filemanager. Download Nulled Filedash – File Manager Dashboard. 1 - Added new Scrollable layout. x, Columns: 4+. FTP Access Upload files via FTP Need easier and faster way to upload and download. The default locale of the file manager is en (English). In our editor / file manager we should see a . It is distributed through NPM under the kendo-react-upload package. It was initially called Filer but was changed to Thunar due to a name clash. It can be used as a standalone app or as a middleware. I will try to make it clean and retro-compatible with the previous bridges/connectors. When it comes to both of these issues, React can help you provide a better user experience. Themes and Skinning JavaScript - jQuery, Angular, React, Vue React Data Grid. Build files will be created build. Select, Copy, Paste, and Delete. Python dictionary add, delete, update, exists keys with performance; java. react-dropzone is a React’s implementation of popular drag and drop library for file uploading. And in our opinion, the Webix library offers the best solution available on the market. Free bootstrap snippets, examples and resources built with html, css and js. Adding React File Manager for PDF Library In the previous section, we added the File Server Node API component from filemanager (by -OpusCapita) . So follow the below setups:- 1) Install the @react-native-community/checkbox package like below in your application 2) Link the module in your application 3) Import Free React Design System For Bootstrap 4 (reactstrap) 9,824 4. You can add spans to any grid element, fine-tune the table sizes, specify the columns’ auto width, and freeze one or more columns. Folder based file browser given a flat keyed list of objects, powered by React. 10. Client React connector for Google Drive API v2; Detailed documentation for each package is coming soon. You can fire up the project with dotnet run to see what the scaffold does for you. /assets/images/defaultUser. It is fully responsive, built with Bootstrap 4 Framework, HTML5, CSS3 and SCSS. The editor is the whole CodeSandbox application (file manager, code editor, dependency settings) and the preview is the result you see on the right. Extension for Visual Studio Code - Simple extensions for React, Redux and Graphql in JS/TS with ES7 syntax Another file format that uses the MD file extension is Moneydance Financial Data. import React, { useEffect, useRef, useState } from 'react'; // Specify camera icon to replace button text import camera from '. - Added Blog List, Blog Grid, Blog Details pages. Site Navigation and Layout. React Native This is an exact mirror of the React Native project, A lightweight and easy-to-use password manager Clonezilla. All the operating systems got a file manager to filter the required files. Create React App – How to Create and Deploy a React Application to Production. and we can drill down into various modules. 5. 0 To do so, right-click the libman. By default, Storybook's webpack configuration will allow you to: Import Images and other static files Semantic UI React provides React components while Semantic UI provides themes as CSS stylesheets. mov, . (Ex – Facebook, Twitter and Google. Then add the File Manager component as shown in below code example. Web. View . This is one of the admin tools that our customers manage their static files on shared host. Initially, the selectedFilestate is set to null The FileBrowser dialogs consist of a FileBrowser object, an object previewer/property manager and a file uploader tab. Let’s install Bootstrap and React. ” - [source] You can delete the files from My Media, My Documents or My Photos folders. Managing your React. Site Navigation and Layout. This can be done in one of two ways: Run bower install --save for each package (the --save flag adds the dependencies (name and version) to the bower. fThe file is called : "FirstReactApp. bs4 File Manager. A partition and disk Adds React debugging tools to the Chrome Developer Tools. This is a Sample React Plugin for Apache OODT File Manager. The FileBrowser dialogs consist of a FileBrowser object, an object previewer/property manager and a file uploader tab. Thunar is designed to start up faster and be more responsive than some other Linux file managers, such as Nautilus and Konqueror. rtl8761a_mp_chip_bt40_fw_asic_rom_patch_8812ae_new. This is an example file with default selections. new file manager windows 10 Executive Summary These course materials were originally designed for Google managers to help them transition from an individual contributor role to a manager role. jsx". dll and React. filebrowser provides a file managing interface within a specified directory and it can be used to upload, delete, preview, rename and edit your files. Files. json must be written in JSON. I will try to make it clean and retro-compatible with the previous bridges/connectors It's very important for me your collaboration on my development tasks and time. They are not part of the template and NOT included in the final purchase files. So in the above imports, the files would be CartTotal. In XCode, in the project navigator, right click Libraries Add Files to [your project's name] Go to node_modules react-native-file-manager and add the . LibraryManager. You can go for either an HTML5 drag & drop file uploader or use the traditional way. json file is saved. I want somebody who can redo what has been done and finish it. On the file manager for a domain you have a ‘public_html’ folder. dll (if using MVC 4) in your Web Application project Your first build always needs to be done using the build script ( dev-build. The FileBrowser provides the ability to browse directories and locate a file item. Downloading the file. cs ). ) Wrap long file names in the File Manager’s detail view Customize icons in the Folder Tree. com, lands you with the opportunity of working with a leading technology organization. v 2. You Install react-file-reader (A flexible ReactJS component for handling styled HTML file inputs. files[0]) } On saving, create-react-app will instantly refresh the browser. babelrc configuration file. Looking for the best react native app development companies? Here is the list of the top React native developers with reviews by ADA. Unfortunately it can be quite intimidating. if I add/remove files in File Manager, it will react dynamically on the front-side (so I don’t need to modify the shortcode or put a React is a popular open-source JavaScript library – many of you asked for an easier integration between Power BI and React web applications. Free bootstrap snippets, examples and resources tagged with file-manager, html, css and js. Themes and Skinning JavaScript - jQuery, Angular, React, Vue React Data Grid. Please help me to move forward with a donation by paypal :) The file manager component is used to browse, manage, and organize the files and folders in a file system through a web application. js. js file to a . Build) to the project, which will trigger a restore as part of project build. json file and choose “Enable Restore on Build”. Any FOSS lover is warmly welcomed A lot of people name React components with a capital letter in the file, to distinguish them from regular JavaScript files. To delete one or more files, 1. When viewing a module locally, the files are contained within module-name. Input A file input management component for React. WP Media Folder v5. Upload React website to subdomain Open File Manager Create new folder inside “public_html” Upload whole content of “build” folder into this new created folder. The “React JS Developer” role at one. How can I create a custom command for the Kendo UI File Manager? Creating a Custom Command. Restore on demand Library Manager will restore client-side libraries whenever the libman. It allows the creation of multiple users and each user can have its own directory. In this tutorial we are going to create a task manager application from scratch with react. Once a file item is selected, it (or its properties) is loaded in the previewer. The DevExtreme JavaScript FileManager component allows you to display and manage files and directories for different file systems. To make the functions work as expected, I transpile these into CommonJS format in addition to transpiling React JSX files. 부디 도움이 되길 바랄 뿐입니다. It's a command-line utility connected with the corresponding online repository of packages and is capable of package installation, version management, and dependency management. Step 9: Configuring AVD Manager. This time with Trillo File Manager is an application for Dropbox-like functionality on the top of the GCS cloud storage bucket. However, you don’t want to use the standard file input HTML element, instead use a styled link or button to show the file window. 3 - Binding of UI elements to JavaScript object models. 2. React is one of the best choices for building modern web applications. js file. Default configuration. This is a Sample React Plugin for Apache OODT File Manager. React has a slim API, a robust and evolving ecosystem and a great community. If nothing happens, download Xcode and try again. com and its affiliated web properties is provided "as is" without warranty of any kind. After downloading the installation file of it, double click on it and proceed with the installation. The KendoReact Upload component is part of the KendoReact library of React UI components. common. Say “EDIT MODE”. json React Component by Creating. You can delete the files from My Media, My Documents or My Photos folders. The new React component supports both JavaScript and TypeScript and will help you embed your analytics in a React web application. Our file caching system will have two main parts. Tailor fman to your needs with its powerful plugin system. React can handle a single button, a few pieces of an interface, or an app's entire user interface. js. Requirements Creating a File Upload Component with React. react-native-azure-blob-storage-manager. npm install react-files --save Usage Basic I don't think there is one, but it's such a strange question, React is used on the web as a frontend library, while a file manager runs on your local computer. Then use the Axios library to send the file request to the Laravel server and saves the image in the server. ReactOS is a free and open-source operating system for x86/x64 personal computers intended to be binary-compatible with computer programs and device drivers made for Windows Server 2003. Component { render() { return ( ); } } export default App; File Manager can be initialized using the tag. npm start To create a new build inside dist directory. Disclaimer: The information provided on DevExpress. Download Manager is a system service which optimizes the handling of long-running downloads in the background. The first is a React component, which will wrap around RNFetchBlob’s functionality and respond to changes in the Redux store. This method of deleting corrupted files requires you to close "Windows Explorer" through "Task Manager". json. A list of its key features is given below. The MD file stores transactions, budgets, stock information, bank accounts, and other related data for the Moneydance finance software. The second is a set of actions and reducers on the Redux store which deal specifically with file caching. apiOptions, apiRoot: `http://opuscapita-filemanager-demo-master. Integrate TinyMCE editor in Laravel with a File Manager / Image Upload Jquery PHP July 27, 2019 2,006 views Create Sortable, drag and drop multi-level list with jquery like wordpress menu page All Webix widgets and applications function well in the Angular or React environment. Today we will create File Manager App UI with animation using React Native. Sweet Alert in dark layout; Design Files Removed. Installation. Free bootstrap snippets, examples and resources tagged with file-manager, html, css and js. . dll. When a TypeScript script gets compiled there is an option to generate a declaration file (with the extension . You have high end protection; It also has a file manager that is easy to access. With React, you can create reusable components that are independent of each other. Use the Download button in the toolbar. The following table represents the default texts and messages of the file manager in en culture. This project based course will introduce you to all of the modern toolchain of a React developer in 2020. For example, users can upload images, videos, etc on Facebook, Instagram. Source + Demo. Data List; React DataTable Component Vue based front-end for File Manager Aug 01, 2018 1 min read. To associate your repository with the react-electron topic, visit your repo's landing page and select "manage topics. . 6. Benefits of Hosting React. This will add the LibraryManager NuGet package (Microsoft. Add events to precisely control file/folder operations (folder creation, file uploading, moving, deleting, etc. 6. Go through the following steps for creating React project to download file from server using React. A full list of the compatible frameworks and integration examples you can find on this page . Files and folders in the file system can be sorted in either ascending or descending order simply by npm install --save @opuscapita/react-filemanager @opuscapita/react-filemanager-connector-node-v1. If nothing happens, download GitHub Desktop and try again. import ReactFileReader from 'react-file-reader'; class Because the other files & folders above (some truncated) are usually part of a default react-native init installation, our focus would be on the src folder:. tsx. 5, npm 6. Drag & Drop your files in folders: Drag & Drop and image to move it into a folder, where you can find other files. To configure the AVD Manager click on the respective icon in the menu bar. These first have been selected by most active users and ranking has been given based on the most popular votes. Accessible , tested with AT software like VoiceOver and JAWS, navigable by Keyboard . changing a . target. Install from NPM and include it in your own React build process (using Browserify, Webpack, etc). The project is about uploading a users products/services. The name npm (Node Package Manager) stems from when npm first was created as a package manager for Node. If multiple packages depend on a package - jQuery for example - Bower will download jQuery just once. React. View . Communicating react with asp. holyidiot updated Vuexy - Vuejs, React, HTML & Laravel Admin Dashboard Template with a new update entry: Update [6. 0] – 2020-11-28 Latest Update [6. ts) that functions as an interface to the components in the compiled JavaScript. Added 2021-01-09 file-manager,file-browser spofly Desktop app to find lyrics of currently playing song on spotify. You have two options for creating a file uploader. ReactOS will only be compatible with computers that are compatible with Windows 2003 or XP. ej2-react-filemanager. Basic usage. Any FOSS lover is warmly welcomed React Native ; Bootstrap file-manager examples. Bower provides hooks to facilitate using packages in your tools and workflows. jsx" Select Web => JSX File, and enter file name "FirstReactApp. The FileManager provides an inbuilt Search functionality, allowing you to find the specific file in the currently selected folder. React File Manager: A Lightweight & Customizable Component File upload and download. File Browser Front-end. Paper Kit ReactOS is a free, opensource reimplementation of windows Related: How to Copy and Paste Text, Files and Folders in Linux Terminal. Developer Express Inc disclaims all warranties, either express or implied, including the warranties of merchantability and fitness for a particular purpose. Removed complementary design files from the package [5. Uploading Files using HTML5 Uploader. TagSpaces features basic file management operations, so it can be used as simple file manager. To associate your repository with the react-electron topic, visit your repo's landing page and select "manage topics. dhtmlxGrid contains rich API functionality. This is an unparalleled array of features, design elements and reusable components Introduction to Asus file manager. To do so, right-click the libman. /assets/images/camera. Try All UI Components for Free All UI components for React are part of the dhtmlxSuite library. Changing the Webpack config. Add multiple URL to pocket at a time. Create better React apps faster and add data visualizations with the world's fastest, virtualized, real-time React data grid and streaming financial and business charts. File uploading means a user from a client machine wants to upload files to the server. net core on remote server (httpdocs folder) does not work RSS 0 replies Last post 2 hours, 59 minutes ago by fiazahmed An electron based file manager. 10 – WordPress File Manager Using the default WordPress media manager also means that the plugin will be very compatible with all the other plugins you use. It’s not a visual file manager, but it gives a set of functions to easily handle media/files in your Laravel app. html’ file to run on start up. Once a file item is selected, it (or its properties) is loaded in the previewer. Read the full article at: http://bgwebagency. The presence of these handlers enables the buttons and/or the drag & drop responsiveness. (eg. However, newer versions of the program use . The FileManager uses file system providers to access file systems. fastlane/ This folder, as you might React doesn’t have opinions on how you put files into folders. files[0]) } On saving, create-react-app will instantly refresh the browser. No action needed. pdf file to use a . Firebase issue during npm install [5. The standard tool for this task is Babel. The create-react-app utility configures tools such as Babel and webpack for the client-side React application. Build) to the project, which will trigger a restore as part of project build. In this section, we are going to add the Client React component from OpusCapita for navigating the folders and listing the files in our PDF library. nyc_output and coverage folder containing our instrumentation detail. . In terms of frontend frameworks, this React admin dashboard is powered by Material-UI, which is the most popular material-based UI components framework available today. - Added File Manager Page. MONEYDANCE files instead. Blog Post. Page 16. lang. A file input (dropzone) management component for React. Videos: Manage member level settings & Videos created by Users. storybook/main. ) Channels: Manage member level settings & Channel created by Users. It enables the user to perform common file operations such as accessing, editing, uploading, downloading, and sorting files and folders. To upload a file with React and Laravel, create a React file component and backend in Laravel. 2. React Chart. The following sample is extracted from Android 4 ICS emulator image. Similarly, every smartphone has a file manager to view, edit, and create any text files, delete, sort, or rename, copy, and cut whenever required. All npm packages are defined in files called package. ). The official front-end framework for building experiences that fit seamlessly into Microsoft 365. Create shortcuts for files: Hold SHIFT and move a file with drag & drop to another folder in order to create a shortcut Bootstrap snippets. 1. js file. To select a specific file, you need to use the number assigned to it. See bundler defaults for the full list. Bower is optimized for the front-end. 15. React. This allows teams to set conventions that work best for them, and to adopt React in any way they would like to. It also supports uploading a file by dragging it from Windows Explorer to FileManager control. Cezerin is open-source ecommerce platform. Tailor your React grid component according to your needs. dll GitHub - networknt/react-file-manager: A react remote file manager with Light Framework as back end file system. Social Sites Integration: With one click, you can login to your site using Social Sites. Using the arrow keys, move over the desired file or folder and press Space on the keyboard. 9 all file uploads, including those initiated by the File Browser plugin, expect a JSON response (like this one ). In addition to building skills, this curriculum incorporates introspection, perspective shifting, and awareness building. These two parts are very decoupled and only communicate using postMessage. Plugin has two parts: Front-end & Back-end. Note that your changes would be temporary and will not persist between re-runs of your package manager. Other Feature Module in React template: Voice and Video Call File Manager Contacts and Email Departments and Designations Timesheet and Overtime Kanban Board Payroll, Payslip and Payrun Company Policies Performance, Goal Tracking, Training and Promotion Resignation and Termination Faq and Knowledgebase Profile Settings, Profile and Edit Profile 🎈 React Material-UI. August 08, 2018. Store the file in state, and only upload when a user clicks the upload button. Overview. This package help you to upload file and assests from react native project to your azure blob storage service. If you want to download large files/streaming you can use Android Download Manager. React components can be installed via yarn or npm: After install, import the minified CSS file in your app's entry file: File manager built with the help of Suite components: Layout, Grid, DataView, Toolbar, etc. js doesn’t have to be hard and with these few steps, you can do it yourself. js'; class App extends React. Use npm to install the Expo CLI command line utility from the Windows Command Prompt, PowerShell, Windows Terminal, or the integrated terminal in VS Code (View > Integrated Terminal). Clear the cache from admin panel. js , or Total. How you use packages is up to you. html file should reveal our code coverage in a human readable and hopefully revealing way. We will cd into react project react-file-upload – cd react-file-upload Now will install dependencies – npm install bootstrap npm install react-toastify npm install axios The bootstrap help to create ui based on bootstrap 4, react-toastify is use to display beautiful notification into react app and axios for HTTP client. Save time by quickly jumping to directories. CKEditor 4 can be easily integrated with an external file manager (file browser/uploader) thanks to the File Browser plugin which by default is included in the Standard and Full presets. A very smart filemanager to manage your files in the browser developed in AngularJS following Material Design styles by Jonas Sciangula Street. These files will always be rendered/loaded to the page when an instance of the module is on the page (Module instances are the individual rendered modules on the page). FileManager Conclusion To make a file downloadable from your website, start by creating a folder on your server for both your website's HTML page and the file you want to share. Select the file in the manager. It is often used for developing Web Applications or Mobile Apps. mp3, . Edit: But As a web app. 2. You can add a custom thumbnail and text description to every file or folder. That said there are a few common approaches popular in the ecosystem you may want to consider. changing a . Drag and Drop Support in React FileManager component 23 Feb 2021 / 1 minute to read The file manager allows files or folders to be moved from one folder to another by using the allowDragAndDrop property. Once you make the folder, you can find it by using your Control Panel's file manager or the file browser in your FTP program. In XCode, in the project navigator, select your project. files[0]holds the actual file and its details. react-file-manager. React Native ; Bootstrap file-manager examples. Download the App Center SDK for React Native frameworks provided as a zip file and unzip it. React, Node v12. A dual-pane file manager for Mac, Windows and Linux. Restore on demand Library Manager will restore client-side libraries whenever the libman. html, the rest of the website build in React shows nothing (white page). Set Api path for React. To connect the component with the file system items, assign the Remote File System Provider to the fileSystemProvider property. React File Manger Multi-column File Manager based on react-beautiful-dnd. React JS Developers one. js. Here are some of the best places to find up-to-date information on React and TypeScript: React TypeScript Cheatsheets React is a JavaScript library that aims to simplify development of visual interfaces. svg'; // replace it with your path // Specify your default image import defaultUser from '. json. File Manager. css'; import 'devextreme/dist/css/dx. js under dist/ directory. /. The FileBrowser provides the ability to browse directories and locate a file item. Chocolatey is software management automation for Windows that wraps installers, executables, zips, and scripts into compiled packages. React Chart. npm run build. files[0]holds the actual file and its details. This one is a little different. All of the files shared are under GPL License. Like a photo, pdf or any other file type. 3] - 2020-04-04 VueJS + Laravel, HTML + Laravel Updated. in/building-a-full-st Related Posts: How to download file from server using Angular; Prerequisites. Cronus File Manager Live Demo. Files files will be hosted on the server on a cloud service. js under dist/ directory. Allows creating a Progressive Web Apps built with React and Node. A flexible and beautiful Select Input control for ReactJS with multiselect, autocomplete and ajax support. You can add support for other types by adding an assetExts resolver option in your Metro Bower keeps track of these packages in a manifest file, bower. Please note that bluehost doesn’t upload folder and its content. Most common file types are supported including . Scheduler. Go to the My Media/My Documents/My Photos folder. If you don’t have that file already, you just create a blank file, and put that content into it. js is an open-source JavaScript library that is used for building user interfaces specifically for single-page applications. The KendoReact Upload helps users send files from their file systems to dedicated server handlers which are configured to receive them. Basic usage. Mobile applications definitely offer a greater value to businesses than their mobile website • Fixed Spell checker not working and missing Image Advanced Edit button in Node JS SDK • Fixed Unable to load any images or files Python Flask SDK • Fixed Upload Video not working in Rail SDk • Fixed On opening an uploaded file throws "HTTP status 404-Not Found" in Java SDK • Fixed Unable to upload images in Java SDK • Fixed On opening an uploaded file throws "Template is missing The require syntax described above can be used to statically include audio, video or document files in your project as well. If nothing happens, download GitHub Desktop and try again. xml :This file contain list . com and its affiliated web properties is provided "as is" without warranty of any kind. As of Kendo UI R1 2020 SP1 the kendo. Go to the My Media/My Documents/My Photos folder. Click on the Delete button. Material Dashboard React Nodejs . ) Simple Example. 2] - 2020-02-18 React Aaded. react-dom@^16. npm install -g expo-cli Use Expo to create a React Native app that runs on iOS, Android, and web. For the former, there is a library called react-dropzone that is built with React. File Operations. This happened right after updating the code when I tried to upload some . Pass the endpointUrl to the remote file system provider object to specify the Url at which the component can access the file system items. First we need to install the dependencies for React. ui. Note: In the documentation, the package babel-plugin-styled-components is specified, as well as a . This project provides a web file manager interface, allowing you to create your own backend connector following the connector API. 3 - Upgraded React version to 17. However, it is not designed to work with SSR. target. Mvc4. There’s nothing more to add, just check out our demo to get a clear idea of what you can do with it. It’s used for handling the view layer for web and mobile apps. jsx) by right clicking on container folder script => react folder select a file from new items dialog popup and click on Add button. /data. This is where Babel macros come in. 07 August 2019. This prod Nowadays, Node Package Manager (npm) is one of the most demanded gadgets in the web developer tool belt. You can open the Task Manager by 2 options. The value should be an async function that receives a webpack config and eventually returns a webpack config. js on cPanel. Like a photo, pdf or any other file type. Along the way, we will build a massive e-commerce application similar to Shopify using React, Redux, React Hooks, React Router, GraphQL, Context API, Firebase, Redux-Saga, Stripe + more. js on cPanel. bat ) as this generates a few files required by the build (such as SharedAssemblyVersionInfo. With this in place, feel free to open the solution file in Visual Studio or VS Code. Free Web File Thunar is developed by Benedikt Meurer, and was originally intended to replace XFFM, Xfce's previous file manager. React Scheduler Storybook is an open source tool for developing UI components in isolation for React, Vue, and Angular. Ignite UI for React also includes the most complete Microsoft Excel solution and 60+ chart types with interactive panning and zooming, touch support and much more. An online file manager which can be used on its own, or as a plugin for a rich-text editor such as CKeditor, TinyMCE or FCKeditor. png extension) Let’s take a quick look at how to manage those breaking user interactions: to the . Option 1: Type "task" in the search box beside the Start menu, and press Enter when you see the "Task Manager" app. /. export const fileItems = [{ 'name': 'Documents', 'isDirectory': true, 'category': 'Work', 'items': [{ 'name': 'Projects', 'isDirectory': true, 'category': 'Work The Custom File System Provider allows you to implement custom APIs to handle file operations (add, delete, rename, etc. Initially, the selectedFilestate is set to null Next time you’re looking for a file, it’s just a click away in the file manager. This will add the LibraryManager NuGet package (Microsoft. . The Edit screen with option to select one or more files is displayed. js) as shown below. Webix suggests a ready-made solution, which is JS File manager, that can be built into any web application. Fileside Modern, tiling file manager with unlimited panes. I have a demo on Laravel + React. React, Redux, Material UI, Nodejs, ExpressJs . We use Gatsby with TypeScript for this website, so that can also be a useful reference implementation. module folders. Work fast with our official CLI. To select a file or folder: 1. log(event. Store the file in state, and only upload when a user clicks the upload button. wav, . You can then use the Dropzone component to render the HTML5 Drag What we would like to see from a project manager is the following: - A candidate that can manage: 1 - Experience with React context api . 90/5. Declaration files. It come with unlimited customized email with your domain. Looking for the best react native app development companies? Here is the list of the top React native developers with reviews by ADA. To select a specific file, you need to use the number assigned to it. Angle - Responsive Bootstrap Admin Template. Use it as a child component of you application. As with any programming problem, there are many ways to achieve this outcome. Run npm install and npm start after that. For initialising file manager you have to install and run both of them from terminal with commands . 3. 5. react-files. /. A predictable state container for JavaScript apps. xcodeproj file. JSX Now, we need to create a first component to create a file (. A simple file manager built with react. Client implementation is an npm package which can be embed into your application. Grouping by features or routes One common way to structure projects is to locate CSS, JS, and tests together inside folders grouped by feature or route. yarn add react yarn add react-dom yarn add --dev parcel-bundler. 11, React 16/17. The File Manager is a graphical user interface component used to manage the file system. mp4, . Delete a file. Web. expo-file-system ( docs) expo-media-library ( docs) After you’ve done that we can proceed. Create shortcuts for files: Hold SHIFT and move a file with drag & drop to another folder in order to create a shortcut There are several possible ways of using Webix with React: using a Webix widget in a React app; creating a custom Webix+React component; using a Webix widget with Redux; How to Start. Option 1: Package Manager. This article explains a simple way to implement the approach to upload a single file with React. Software Package Manager. View demo Download Source. Say “MORE OPTIONS” 3. Free . You can assign custom color to every folder and tag, which makes the visual search an easy step. - Fixed minor bugs. net/` // Or you React File Manager Usage (iOS) First you need to install react-native-file-manager: npm install react-native-file-manager --save. " File Manager. Step 8: Configuring AVD Manager. On the backend, we are going to use Laravel’s Storage API to store images. Now, you can start adding Essential JS 2 File Manager component to the application. 3. All basic file handling mechanisms like upload, download, read, edit, delete, search, and sort can be performed to manage and organize the files and folder in a file system. ly/3d8cXTx To learn more about the react-native visit: The FileManager UI component can work with a file system located on the server. Angular React Vue jQuery PeaZip is a free archiver tool. config. com is looking for React JS Developers for our team in Delhi/NCR (India) Employment: Permanent Employment Place of Work: Delhi/NCR (India) CTC: Best in the industry Role. Complete file and folder manager: Create, rename, move and delete a folder. Reference React. 0 / scheduler@^0. Use the fileSystemProvider property to configure the component's file system provider. Video-React is a web video player built from the ground up for an HTML5 world using React library. Let’s begin with the Redux side of things: The Redux Code Unlike the other frameworks covered in this module, React does not enforce strict rules around code conventions or file organization. Free Frontend Preset For Nodejs . That's when we got the idea to create an orthodox WEB file manager, working on the server's site, which would be able to copy between different sources with server speed and would offer: file and directory search, a disk usage analyzer (an analogue of ncdu), simple file uploading and a lot of other great stuff. In traditional HTML sites, the file upload form forces a page refresh, which might be confusing to users. Chocolatey integrates w/SCCM, Puppet, Chef, etc. The React File Manager component allows for the easy uploading and downloading of files in a Sorting. KFM – Kae’s File Manager. Webix File Manager is a ready-made SPA. " For example, I prepare a page “Commercial files” where I will put a shortcode corresponding to the folder of files uploaded in File Manager or Google Drive. React was first created by Jordan Walke, a software engineer working for Facebook. Web The JavaScript Client Library for Azure Storage enables many web development scenarios using storage services like Blob, Table, Queue, and File, and is compatible with modern browsers. npm install npm run start Design. thumbnail support for image files; built-in media player; text editor; many other features. I want to do a very simple file explorer in react that look like the one of Files for google. Multi-Selection. js, and Mongo. React Shopping Cart. More Template Epic React - HR Management Admin Template is High Resolution: Yes, Compatible Browsers: Firefox, Safari, Opera, Chrome, Edge, Compatible With: ReactJS, Bootstrap 4. These first have been selected by most active users and ranking has been given based on the most popular votes. So follow the below setups:- 1) Install the @react-native-community/checkbox package like below in your application 2) Link the module in your application 3) Import Get code examples like "usenavigate react" instantly right from your google search results with the Grepper Chrome Extension. 6. Disclaimer: The information provided on DevExpress. 0/v14. css'; import FileManager from 'devextreme-react/file-manager'; import { fileItems } from '. Since CKEditor 4. In the process the compiler strips away all function and method bodies and preserves only the signatures of the types that are exported. You have to manually create sub-folder, then upload files into that folder. React Filemanager Hello ex angular-filemanager user, this is the new version in React. The application provides an unified, natively portable, cross-platform file manager and archive manager GUI for many Open Source technologies like 7-Zip, FreeArc, PAQ, UPX. But first, here are the benefits of hosting your React. The ASP. It is developed by laborasyon on ThemeForest. In this tutorial, we will upload an image from the react js component. 80/5. React is an open-source JavaScript library developed by Facebook used for creating web frontend and UI components. 14. File Manager and Core Data: Used to save photo, video, audio, and pdf data to the ios device url sessions: Used to communicated with the server to upload the data to the Utah State Geographical Cuba admin is super flexible, powerful, clean & modern responsive bootstrap 5 admin template with unlimited possibilities. Or if you have the optional Yarn package manager installed. React & JavaScript articles. The File Manager component supports multiple selections of files and folders in a file system. Storybook - GitHub Pages angular-filemanager. It is worth noting the beautiful design, and a ready-made set of icons, which are included in the delivery. 10. Work fast with our official CLI. Beside Material-UI, we also integrated, with the same design style, over 80 React widgets and plugins. Click on the Next button you will see a System React Fixed. Developer Express Inc disclaims all warranties, either express or implied, including the warranties of merchantability and fitness for a particular purpose. NullPointerException; TypeError: string indices must be integers – Python; valueerror: setting an array element with a sequence – Python; TypeError: a bytes-like object is required, not ‘str’ – Python Drop files, select on filesystem, copy and paste files, or add files using the API. To start the app server it will display live changes (optional) 4. 그럼 스타뜨! 배경 DCE내에서 파일 업로드 및 관리를 할수 있는 GUI 화면이 필요했다. React Scheduler Disclaimer: The information provided on DevExpress. The issue with this is that, because we’re using create-react-app, we can’t configure a lot of things unless we eject. This package support multiple files selection, cloud storage integration. Creating a file upload component is a common task in web development. Developer Express Inc disclaims all warranties, either express or implied, including the warranties of merchantability and fitness for a particular purpose. At least two fields must be present in the definition file: name and version. 0] – 2020-11-28 HTML, HTML + Laravel ADDED All-new design based on Ul/UX principles New Bordered & Dark layout New eCommerce Dashboard Invoice Bamburgh React Admin Dashboard with Reactstrap PRO is built entirely on React and uses the popular starter kit Create React App from Facebook. Now my APP only show that green circle button located in index. e. Just be sure to follow the installation instructions for “bare” or plain react-native apps. json file. Build files will be created build. First, we install dependencies using npx then download the laravel project. 4. Download Epic React – HR Management Admin Template nulled from the below download links and if the item satisfy you then buy it from the developer puffintheme for commercial use. - Minor fixes of RTL SCSS. npm install --save react npm install --save react-dom npm install --save-dev parcel-bundler. js - TS docs; Gatsby - TS Docs; All of these are great starting points. Finally, what all this was leading up to, opening that index. I guess it's technically possible to write a file manager in Node, use React for the UI, and package it as a desktop app with Electron, but I would still not call that "React based" (and C. js in your Greg Fodor - Engineering Manager Mozilla The development team involved have been very impressed by the React Admin framework and it has been capable of handling the complex challenges we have had for it thusfar. It uses React framework and supports connectors to different file storages. Drag & Drop your files in folders: Drag & Drop and image to move it into a folder, where you can find other files. File Manager: Admin can import/export & upload new files. packages. Simple event handlers are also provided as props to the browser, which allow it to respond to actions on the files. Thus you will get an example of integration usage. onChangeHandler=event=>{ console. . It has a large UI collection. dll. onChangeHandler=event=>{ console. import React from 'react'; import ReactDOM from 'react-dom'; import { FileManager, FileNavigator } from '@opuscapita/react-filemanager'; import connectorNodeV1 from '@opuscapita/react-filemanager-connector-node-v1'; const apiOptions = { connectorNodeV1. json file is saved. API-first CMS. json file and choose “Enable Restore on Build”. FileManager also performs operations like creating a new folder, moving files, and searching. This control is part of the Telerik UI for ASP. Created from revision f160547f47 on 12/4/2020. All APIs that implement access to Azure Blob Storage on the client are stored in the azure-file-system. banzay/friends-app-redux Second take on friends app. 4 - Creating RESTful services with Package Manager stores application information in three files, located in /data/system. Is the Excel Viewer widget compatible with the Webix community (opensource) edition? PHP & MySQL Projects for $2 - $10. rtl8761a_mp_chip_bt40_fw_asic_rom_patch_8192eu_new. d. Select the file to upload from the file selector dialog box; Downloading a file. pdf. NET Core FileManager lets your users browse through directories and files, akin to file managers like Windows Explorer, and manage file storage within their web applications. spatie/laravel-medialibrary Released: August 2015 Installs: 178 000 Last update: May 2017 (1 day ago). Note the command dotnet new react; this is the template I’m using for this React project. Your domain will look in this ‘public_html’ folder for a top ‘index. You'll see a folder named AppCenterReactNativeShared which contains a single framework for the required React Native iOS bridge. Be it a web-based gaming experience where you store state information in the Table service, uploading photos to a Blob account from a Mobile app, or an entire CodeSandbox at its core consists of two parts: the editor and the preview. Predefined connectors are: Client React connector for Server Node API v1 Localization in React FileManager component The file manager can be localized to any culture by defining the texts and messages of the file manager in the corresponding culture. babelrc file present in the application root folder. jpeg extension) Uploading an image where the file extension has been intentionally changed and Cloudinary could process it, but the DOM could not render the file (eg. Web based File Manager Manage files online From within the free control panel, an easy to use File Manager helps you to upload files, download files or even edit HTML, PHP or other programming language files. 2. The content of package. Create a new file called manager. All basic file operations like creating a new folder, uploading and downloading of files in the file system, and deleting and renaming of existing files and folders are available in the file manager component. Used technologies. askwon/Filet-Manager Web-based file transfer client written in React, Redux, and Go; ayxos/react-cellar Typescript, MongoDb, Webpack, EC6, Typings, Redux Wine-Cellar; azu/read-all-later [Electron] Read All Later is a client for Pocket. - Added New Auth pages. To run the service, create an Amazon S3 account and a S3 bucket and then register your amazon S3 client account details like bucketName, awsAccessKeyId, awsSecretKeyId and awsRegion details in RegisterAmazonS3 method to perform the file operations. 9. 5. Angle is an admin template based on Bootstrap and multiple frameworks. Personalize your React grid with flexible API. Organizing your blog media files with the Real Media Library plugin is as easy as dragging and dropping them into folders. JavaScript File Manager or in other words File Explorer is a web widget, part of the UI framework for managing files. Developed at Facebook and released to the world in 2013, it drives some of the most widely used apps, powering Facebook and Instagram among countless other applications. light. . Here native file viewer means we are not going to view the file in our application instead we will pick the file from file picker and will pass the file URL to FileViewer component provided by react-native-file-viewer, this component will trigger the native iOS/Android file viewer to open the file. Say “MORE OPTIONS” 3. ). To delete one or more files, 1. To download a remote file’s content to a local file on the device, here’s the code: Hi Dev, In this blog, I will show you how to install file manager package in laravel application. Install Step 1 npm i react-native-azure-blob-storage-manager --save Step 2 Dependencies npm install --save react-native-background-upload iOS cd ios pod install Manual Installation Installation. Node. Use Git or checkout with SVN using the web URL. Page 16. Documentation. We are going to use react-dropzone to build an image uploader. Hello ex angular-filemanager user, this is the new version in React. react file manager diff --git a/src/main/resources/cdtocode/doc/Apache OODT File Manager/React file manager-simEnts.txt b/src/main/resources/cdtocode/doc/Apache OODT File Manager/React file manager-simEnts.txt new file mode 100644 index 0000000000000000000000000000000000000000..600b501136a12f398d7b5d1d04cd146a9e6d4d6e --- /dev/null +++ b/src/main/resources/cdtocode/doc/Apache OODT File Manager/React file manager-simEnts.txt @@ -0,0 +1 @@ +react file manager All components included in this dashboard template has been developed to bring all the potential of HTML5 and Bootstrap plus a set of new features (JS and CSS) ideal for your next dashboard admin theme or admin web application project. 확장성을 보유할것 외부 프로젝트에서도 Description. Redux helps you write applications that behave consistently, run in different environments (client, server, and native), and are easy to test. It has a beautiful design, as you can see from the live previews and it contains a LOT of components and features. filemanager namespace exposes the FileManagerCommand class that could be extended to implement a custom File Manager command. Just display a list within 2 predifined tabs (folders). Create React App - TS docs; Next. The Edit screen with option to select one or more files is displayed. Add Start script to package. 9,676 4. Inbuilt Search textbox in FileManager: See Also. Module files are represented in the design manager in a multi-pane module editor. To include the File Manager component in application import the FileManagerComponent from ej2-react-filemanager package in App. 1 - 28 November 2020 ----- - Upgraded Bootstrap version to 4. Filemanager with React & Nodejs . prod. The download manager handles HTTP connections, monitors connectivity changes, reboots, and ensures each download completes successfully. I would like this shortcode to be dynamic, i. Grab the demo from Github if you haven't done this yet. Mobile applications definitely offer a greater value to businesses than their mobile website In this tutorials we will use a package named @react-native-community/checkbox to add checkboxes in react native. Express your opinions freely and help others including your future self You can customize Storybook's webpack setup by providing a webpackFinal field in . 4. 3. import React from 'react'; import 'devextreme/dist/css/dx. js, Express and TypeScript. Install the React components and choose a theme that suits your needs. target. 2 - Ability to translate Wireframes and PSD Designs into functional web apps using HTML5, React , Node. In this tutorial you will learn how to create a working file upload component with react from scratch using no dependencies other than react itself. 2/6. /. You can rearrange the order of your files by dragging them around to move the important files to the top of the list for faster access. Run the Drupal Page having React Nested modals aren’t supported, but if you really need them the underlying react-overlays can support them if you're willing. Download the corresponding App Center SDK for iOS frameworks provided as a zip file and unzip it. NET Core suite along with 100+ fully-featured UI components designed to speed up delivery & improve every aspect of target. You’ll see a plus symbol to the left of the file or folder. com and its affiliated web properties is provided "as is" without warranty of any kind. An electron based file manager. The file manager application is like the heart of a smartphone. Modal's "trap" focus in them, ensuring the keyboard navigation cycles through the modal, and not the rest of the page. Async uploading with AJAX, or encode files as base64 data and send along form post. Overview of Kendo UI FileManager; Sort in Kendo UI FileManager; Toolbar Commands in Kendo UI FileManager Express your opinions freely and help others including your future self I am a beginner in react. LibraryManager. Developed with the latest jQuery plugins. html and . Complete file and folder manager: Create, rename, move and delete a folder. It's very important for me your collaboration on my development tasks and time. Test your JavaScript, CSS, HTML or CoffeeScript online with JSFiddle code editor. Go to react_code\src and change the apiUrl inside config. js as per your current url of Drupal. rtl8761a_mp_chip_bt40_fw_asic_rom_patch_8192ee_new. Say “EDIT MODE”. 1. To enable profiling in production mode, modify Webpack configuration file (config/webpack. To download and start utilizing Syncfusion's Essential Studio for React components, see our pricing model. A simple file manager built with react. azurewebsites. 0. Vue. Updated laravel 7 to all full version and starter-kit; React Fixed. mp4 videos to the server. Learn to build modern web applications using Angular, React & Vue! File Upload Component with Vue. A file input (dropzone) management component for React. TIP: If you have never seen a dot file (a file starting with a dot) it might be odd at first because that file might not appear in your file manager, as it’s a hidden file. Unlike vanilla Bootstrap, autoFocus works in Modals because React handles the implementation Free download Filedash – File Manager Dashboard Nulled. Maybe later i can have a button to have the grid view File-Manager 개발정의서 들어가며 본 문서는 인수인계 목적이 아닌 개발이 완료된 제품에 대한 이해를 돕기위해 제작된 개발 정의서입니다. Say for instance that you want to open the file select dialogue for a user to select an file to upload. This sample demonstrates how to utilize the Amazon S3 file system provider to manage the files in File Manager component. Deploy Trillo File Manager from GCP Marketplace In this tutorials we will use a package named @react-native-community/checkbox to add checkboxes in react native. Choose a device definition, Nexus 5X is suggestable. Create a new project with React Native. React also allows us to create reusable UI components. Use Git or checkout with SVN using the web URL. bs4 File Manager. v6. js . Source code: https://bit. Learn more . log(event. Also, you might want to customize the look of the file input in the form to make it resonate with your overall app design. Chocolatey is trusted by businesses to manage software deployments. Conclusion Let’s work that out. Scheduler. svg'; // replace it with your path // Profile upload helper const HandleImageUpload = => { // we are referencing the file input const imageRef = useRef(); // Specify the default image const [defaultUserImage React Filemanager. Download Nulled Filedash – File Manager Dashboard. 1 - Added new Scrollable layout. x, Columns: 4+. FTP Access Upload files via FTP Need easier and faster way to upload and download. The default locale of the file manager is en (English). In our editor / file manager we should see a . It is distributed through NPM under the kendo-react-upload package. It was initially called Filer but was changed to Thunar due to a name clash. It can be used as a standalone app or as a middleware. I will try to make it clean and retro-compatible with the previous bridges/connectors. When it comes to both of these issues, React can help you provide a better user experience. Themes and Skinning JavaScript - jQuery, Angular, React, Vue React Data Grid. Build files will be created build. Select, Copy, Paste, and Delete. Python dictionary add, delete, update, exists keys with performance; java. react-dropzone is a React’s implementation of popular drag and drop library for file uploading. And in our opinion, the Webix library offers the best solution available on the market. Free bootstrap snippets, examples and resources built with html, css and js. Adding React File Manager for PDF Library In the previous section, we added the File Server Node API component from filemanager (by -OpusCapita) . So follow the below setups:- 1) Install the @react-native-community/checkbox package like below in your application 2) Link the module in your application 3) Import Free React Design System For Bootstrap 4 (reactstrap) 9,824 4. You can add spans to any grid element, fine-tune the table sizes, specify the columns’ auto width, and freeze one or more columns. Folder based file browser given a flat keyed list of objects, powered by React. 10. Client React connector for Google Drive API v2; Detailed documentation for each package is coming soon. You can fire up the project with dotnet run to see what the scaffold does for you. /assets/images/defaultUser. It is fully responsive, built with Bootstrap 4 Framework, HTML5, CSS3 and SCSS. The editor is the whole CodeSandbox application (file manager, code editor, dependency settings) and the preview is the result you see on the right. Extension for Visual Studio Code - Simple extensions for React, Redux and Graphql in JS/TS with ES7 syntax Another file format that uses the MD file extension is Moneydance Financial Data. import React, { useEffect, useRef, useState } from 'react'; // Specify camera icon to replace button text import camera from '. - Added Blog List, Blog Grid, Blog Details pages. Site Navigation and Layout. React Native This is an exact mirror of the React Native project, A lightweight and easy-to-use password manager Clonezilla. All the operating systems got a file manager to filter the required files. Create React App – How to Create and Deploy a React Application to Production. and we can drill down into various modules. 5. 0 To do so, right-click the libman. By default, Storybook's webpack configuration will allow you to: Import Images and other static files Semantic UI React provides React components while Semantic UI provides themes as CSS stylesheets. mov, . (Ex – Facebook, Twitter and Google. Then add the File Manager component as shown in below code example. Web. View . This is one of the admin tools that our customers manage their static files on shared host. Initially, the selectedFilestate is set to null The FileBrowser dialogs consist of a FileBrowser object, an object previewer/property manager and a file uploader tab. Let’s install Bootstrap and React. ” - [source] You can delete the files from My Media, My Documents or My Photos folders. Managing your React. Site Navigation and Layout. This can be done in one of two ways: Run bower install --save for each package (the --save flag adds the dependencies (name and version) to the bower. fThe file is called : "FirstReactApp. bs4 File Manager. A partition and disk Adds React debugging tools to the Chrome Developer Tools. This is a Sample React Plugin for Apache OODT File Manager. The FileBrowser dialogs consist of a FileBrowser object, an object previewer/property manager and a file uploader tab. Thunar is designed to start up faster and be more responsive than some other Linux file managers, such as Nautilus and Konqueror. rtl8761a_mp_chip_bt40_fw_asic_rom_patch_8812ae_new. This is an example file with default selections. new file manager windows 10 Executive Summary These course materials were originally designed for Google managers to help them transition from an individual contributor role to a manager role. jsx". dll and React. filebrowser provides a file managing interface within a specified directory and it can be used to upload, delete, preview, rename and edit your files. Files. json must be written in JSON. I will try to make it clean and retro-compatible with the previous bridges/connectors It's very important for me your collaboration on my development tasks and time. They are not part of the template and NOT included in the final purchase files. So in the above imports, the files would be CartTotal. In XCode, in the project navigator, right click Libraries Add Files to [your project's name] Go to node_modules react-native-file-manager and add the . LibraryManager. You can go for either an HTML5 drag & drop file uploader or use the traditional way. json file is saved. I want somebody who can redo what has been done and finish it. On the file manager for a domain you have a ‘public_html’ folder. dll (if using MVC 4) in your Web Application project Your first build always needs to be done using the build script ( dev-build. The FileBrowser provides the ability to browse directories and locate a file item. Downloading the file. cs ). ) Wrap long file names in the File Manager’s detail view Customize icons in the Folder Tree. com, lands you with the opportunity of working with a leading technology organization. v 2. You Install react-file-reader (A flexible ReactJS component for handling styled HTML file inputs. files[0]) } On saving, create-react-app will instantly refresh the browser. babelrc configuration file. Looking for the best react native app development companies? Here is the list of the top React native developers with reviews by ADA. Unfortunately it can be quite intimidating. if I add/remove files in File Manager, it will react dynamically on the front-side (so I don’t need to modify the shortcode or put a React is a popular open-source JavaScript library – many of you asked for an easier integration between Power BI and React web applications. Free bootstrap snippets, examples and resources tagged with file-manager, html, css and js. Themes and Skinning JavaScript - jQuery, Angular, React, Vue React Data Grid. Please help me to move forward with a donation by paypal :) The file manager component is used to browse, manage, and organize the files and folders in a file system through a web application. js. js file to a . Build) to the project, which will trigger a restore as part of project build. json file and choose “Enable Restore on Build”. Any FOSS lover is warmly welcomed A lot of people name React components with a capital letter in the file, to distinguish them from regular JavaScript files. To delete one or more files, 1. When viewing a module locally, the files are contained within module-name. Input A file input management component for React. WP Media Folder v5. Upload React website to subdomain Open File Manager Create new folder inside “public_html” Upload whole content of “build” folder into this new created folder. The “React JS Developer” role at one. How can I create a custom command for the Kendo UI File Manager? Creating a Custom Command. Restore on demand Library Manager will restore client-side libraries whenever the libman. It allows the creation of multiple users and each user can have its own directory. In this tutorial we are going to create a task manager application from scratch with react. Once a file item is selected, it (or its properties) is loaded in the previewer. The DevExtreme JavaScript FileManager component allows you to display and manage files and directories for different file systems. To make the functions work as expected, I transpile these into CommonJS format in addition to transpiling React JSX files. 부디 도움이 되길 바랄 뿐입니다. It's a command-line utility connected with the corresponding online repository of packages and is capable of package installation, version management, and dependency management. Step 9: Configuring AVD Manager. This time with Trillo File Manager is an application for Dropbox-like functionality on the top of the GCS cloud storage bucket. However, you don’t want to use the standard file input HTML element, instead use a styled link or button to show the file window. 3 - Binding of UI elements to JavaScript object models. 2. React is one of the best choices for building modern web applications. js file. Default configuration. This is a Sample React Plugin for Apache OODT File Manager. React has a slim API, a robust and evolving ecosystem and a great community. If nothing happens, download Xcode and try again. com and its affiliated web properties is provided "as is" without warranty of any kind. After downloading the installation file of it, double click on it and proceed with the installation. The KendoReact Upload component is part of the KendoReact library of React UI components. common. Say “EDIT MODE”. json React Component by Creating. You can delete the files from My Media, My Documents or My Photos folders. The new React component supports both JavaScript and TypeScript and will help you embed your analytics in a React web application. Our file caching system will have two main parts. Tailor fman to your needs with its powerful plugin system. React can handle a single button, a few pieces of an interface, or an app's entire user interface. js. Requirements Creating a File Upload Component with React. react-native-azure-blob-storage-manager. npm install react-files --save Usage Basic I don't think there is one, but it's such a strange question, React is used on the web as a frontend library, while a file manager runs on your local computer. Then use the Axios library to send the file request to the Laravel server and saves the image in the server. ReactOS is a free and open-source operating system for x86/x64 personal computers intended to be binary-compatible with computer programs and device drivers made for Windows Server 2003. Component { render() { return ( ); } } export default App; File Manager can be initialized using the tag. npm start To create a new build inside dist directory. Disclaimer: The information provided on DevExpress. Download Manager is a system service which optimizes the handling of long-running downloads in the background. The first is a React component, which will wrap around RNFetchBlob’s functionality and respond to changes in the Redux store. This method of deleting corrupted files requires you to close "Windows Explorer" through "Task Manager". json. A list of its key features is given below. The MD file stores transactions, budgets, stock information, bank accounts, and other related data for the Moneydance finance software. The second is a set of actions and reducers on the Redux store which deal specifically with file caching. apiOptions, apiRoot: `http://opuscapita-filemanager-demo-master. Integrate TinyMCE editor in Laravel with a File Manager / Image Upload Jquery PHP July 27, 2019 2,006 views Create Sortable, drag and drop multi-level list with jquery like wordpress menu page All Webix widgets and applications function well in the Angular or React environment. Today we will create File Manager App UI with animation using React Native. Sweet Alert in dark layout; Design Files Removed. Installation. Free bootstrap snippets, examples and resources tagged with file-manager, html, css and js. . dll. When a TypeScript script gets compiled there is an option to generate a declaration file (with the extension . You have high end protection; It also has a file manager that is easy to access. With React, you can create reusable components that are independent of each other. Use the Download button in the toolbar. The following table represents the default texts and messages of the file manager in en culture. This project based course will introduce you to all of the modern toolchain of a React developer in 2020. For example, users can upload images, videos, etc on Facebook, Instagram. Source + Demo. Data List; React DataTable Component Vue based front-end for File Manager Aug 01, 2018 1 min read. To associate your repository with the react-electron topic, visit your repo's landing page and select "manage topics. . 6. Benefits of Hosting React. This will add the LibraryManager NuGet package (Microsoft. Add events to precisely control file/folder operations (folder creation, file uploading, moving, deleting, etc. 6. Go through the following steps for creating React project to download file from server using React. A full list of the compatible frameworks and integration examples you can find on this page . Files and folders in the file system can be sorted in either ascending or descending order simply by npm install --save @opuscapita/react-filemanager @opuscapita/react-filemanager-connector-node-v1. If nothing happens, download GitHub Desktop and try again. import ReactFileReader from 'react-file-reader'; class Because the other files & folders above (some truncated) are usually part of a default react-native init installation, our focus would be on the src folder:. tsx. 5, npm 6. Drag & Drop your files in folders: Drag & Drop and image to move it into a folder, where you can find other files. To configure the AVD Manager click on the respective icon in the menu bar. These first have been selected by most active users and ranking has been given based on the most popular votes. Accessible , tested with AT software like VoiceOver and JAWS, navigable by Keyboard . changing a . target. Install from NPM and include it in your own React build process (using Browserify, Webpack, etc). The project is about uploading a users products/services. The name npm (Node Package Manager) stems from when npm first was created as a package manager for Node. If multiple packages depend on a package - jQuery for example - Bower will download jQuery just once. React. View . Communicating react with asp. holyidiot updated Vuexy - Vuejs, React, HTML & Laravel Admin Dashboard Template with a new update entry: Update [6. 0] – 2020-11-28 Latest Update [6. ts) that functions as an interface to the components in the compiled JavaScript. Added 2021-01-09 file-manager,file-browser spofly Desktop app to find lyrics of currently playing song on spotify. You have two options for creating a file uploader. ReactOS will only be compatible with computers that are compatible with Windows 2003 or XP. ej2-react-filemanager. Basic usage. Any FOSS lover is warmly welcomed React Native ; Bootstrap file-manager examples. Bower provides hooks to facilitate using packages in your tools and workflows. jsx" Select Web => JSX File, and enter file name "FirstReactApp. The FileManager provides an inbuilt Search functionality, allowing you to find the specific file in the currently selected folder. React File Manager: A Lightweight & Customizable Component File upload and download. File Browser Front-end. Paper Kit ReactOS is a free, opensource reimplementation of windows Related: How to Copy and Paste Text, Files and Folders in Linux Terminal. Developer Express Inc disclaims all warranties, either express or implied, including the warranties of merchantability and fitness for a particular purpose. Removed complementary design files from the package [5. Uploading Files using HTML5 Uploader. TagSpaces features basic file management operations, so it can be used as simple file manager. To associate your repository with the react-electron topic, visit your repo's landing page and select "manage topics. dhtmlxGrid contains rich API functionality. This is an unparalleled array of features, design elements and reusable components Introduction to Asus file manager. To do so, right-click the libman. /assets/images/camera. Try All UI Components for Free All UI components for React are part of the dhtmlxSuite library. Changing the Webpack config. Add multiple URL to pocket at a time. Create better React apps faster and add data visualizations with the world's fastest, virtualized, real-time React data grid and streaming financial and business charts. File uploading means a user from a client machine wants to upload files to the server. net core on remote server (httpdocs folder) does not work RSS 0 replies Last post 2 hours, 59 minutes ago by fiazahmed An electron based file manager. 10 – WordPress File Manager Using the default WordPress media manager also means that the plugin will be very compatible with all the other plugins you use. It’s not a visual file manager, but it gives a set of functions to easily handle media/files in your Laravel app. html’ file to run on start up. Once a file item is selected, it (or its properties) is loaded in the previewer. Read the full article at: http://bgwebagency. The presence of these handlers enables the buttons and/or the drag & drop responsiveness. (eg. However, newer versions of the program use . The FileManager uses file system providers to access file systems. fastlane/ This folder, as you might React doesn’t have opinions on how you put files into folders. files[0]) } On saving, create-react-app will instantly refresh the browser. No action needed. pdf file to use a . Firebase issue during npm install [5. The standard tool for this task is Babel. The create-react-app utility configures tools such as Babel and webpack for the client-side React application. Build) to the project, which will trigger a restore as part of project build. In this section, we are going to add the Client React component from OpusCapita for navigating the folders and listing the files in our PDF library. nyc_output and coverage folder containing our instrumentation detail. . In terms of frontend frameworks, this React admin dashboard is powered by Material-UI, which is the most popular material-based UI components framework available today. - Added File Manager Page. MONEYDANCE files instead. Blog Post. Page 16. lang. A file input (dropzone) management component for React. Videos: Manage member level settings & Videos created by Users. storybook/main. ) Channels: Manage member level settings & Channel created by Users. It enables the user to perform common file operations such as accessing, editing, uploading, downloading, and sorting files and folders. To upload a file with React and Laravel, create a React file component and backend in Laravel. 2. React Chart. The following sample is extracted from Android 4 ICS emulator image. Similarly, every smartphone has a file manager to view, edit, and create any text files, delete, sort, or rename, copy, and cut whenever required. All npm packages are defined in files called package. ). The official front-end framework for building experiences that fit seamlessly into Microsoft 365. Create shortcuts for files: Hold SHIFT and move a file with drag & drop to another folder in order to create a shortcut Bootstrap snippets. 1. js file. To select a specific file, you need to use the number assigned to it. See bundler defaults for the full list. Bower is optimized for the front-end. 15. React. This allows teams to set conventions that work best for them, and to adopt React in any way they would like to. It also supports uploading a file by dragging it from Windows Explorer to FileManager control. Cezerin is open-source ecommerce platform. Tailor your React grid component according to your needs. dll GitHub - networknt/react-file-manager: A react remote file manager with Light Framework as back end file system. Social Sites Integration: With one click, you can login to your site using Social Sites. Using the arrow keys, move over the desired file or folder and press Space on the keyboard. 9 all file uploads, including those initiated by the File Browser plugin, expect a JSON response (like this one ). In addition to building skills, this curriculum incorporates introspection, perspective shifting, and awareness building. These two parts are very decoupled and only communicate using postMessage. Plugin has two parts: Front-end & Back-end. Note that your changes would be temporary and will not persist between re-runs of your package manager. Other Feature Module in React template: Voice and Video Call File Manager Contacts and Email Departments and Designations Timesheet and Overtime Kanban Board Payroll, Payslip and Payrun Company Policies Performance, Goal Tracking, Training and Promotion Resignation and Termination Faq and Knowledgebase Profile Settings, Profile and Edit Profile 🎈 React Material-UI. August 08, 2018. Store the file in state, and only upload when a user clicks the upload button. Overview. This package help you to upload file and assests from react native project to your azure blob storage service. If you want to download large files/streaming you can use Android Download Manager. React components can be installed via yarn or npm: After install, import the minified CSS file in your app's entry file: File manager built with the help of Suite components: Layout, Grid, DataView, Toolbar, etc. js doesn’t have to be hard and with these few steps, you can do it yourself. js'; class App extends React. Use npm to install the Expo CLI command line utility from the Windows Command Prompt, PowerShell, Windows Terminal, or the integrated terminal in VS Code (View > Integrated Terminal). Clear the cache from admin panel. js , or Total. How you use packages is up to you. html file should reveal our code coverage in a human readable and hopefully revealing way. We will cd into react project react-file-upload – cd react-file-upload Now will install dependencies – npm install bootstrap npm install react-toastify npm install axios The bootstrap help to create ui based on bootstrap 4, react-toastify is use to display beautiful notification into react app and axios for HTTP client. Save time by quickly jumping to directories. CKEditor 4 can be easily integrated with an external file manager (file browser/uploader) thanks to the File Browser plugin which by default is included in the Standard and Full presets. A very smart filemanager to manage your files in the browser developed in AngularJS following Material Design styles by Jonas Sciangula Street. These files will always be rendered/loaded to the page when an instance of the module is on the page (Module instances are the individual rendered modules on the page). FileManager Conclusion To make a file downloadable from your website, start by creating a folder on your server for both your website's HTML page and the file you want to share. Select the file in the manager. It is often used for developing Web Applications or Mobile Apps. mp3, . Edit: But As a web app. 2. You can add a custom thumbnail and text description to every file or folder. That said there are a few common approaches popular in the ecosystem you may want to consider. changing a . Drag and Drop Support in React FileManager component 23 Feb 2021 / 1 minute to read The file manager allows files or folders to be moved from one folder to another by using the allowDragAndDrop property. Once you make the folder, you can find it by using your Control Panel's file manager or the file browser in your FTP program. In XCode, in the project navigator, select your project. files[0]holds the actual file and its details. react-file-manager. React Native ; Bootstrap file-manager examples. Download the App Center SDK for React Native frameworks provided as a zip file and unzip it. React, Node v12. A dual-pane file manager for Mac, Windows and Linux. Restore on demand Library Manager will restore client-side libraries whenever the libman. html, the rest of the website build in React shows nothing (white page). Set Api path for React. To connect the component with the file system items, assign the Remote File System Provider to the fileSystemProvider property. React File Manger Multi-column File Manager based on react-beautiful-dnd. React JS Developers one. js. Here are some of the best places to find up-to-date information on React and TypeScript: React TypeScript Cheatsheets React is a JavaScript library that aims to simplify development of visual interfaces. svg'; // replace it with your path // Specify your default image import defaultUser from '. json. File Manager. css'; import 'devextreme/dist/css/dx. js under dist/ directory. /. The FileBrowser provides the ability to browse directories and locate a file item. Chocolatey is software management automation for Windows that wraps installers, executables, zips, and scripts into compiled packages. React Chart. npm run build. files[0]holds the actual file and its details. This one is a little different. All of the files shared are under GPL License. Like a photo, pdf or any other file type. 3] - 2020-04-04 VueJS + Laravel, HTML + Laravel Updated. in/building-a-full-st Related Posts: How to download file from server using Angular; Prerequisites. Cronus File Manager Live Demo. Files files will be hosted on the server on a cloud service. js under dist/ directory. Allows creating a Progressive Web Apps built with React and Node. A flexible and beautiful Select Input control for ReactJS with multiselect, autocomplete and ajax support. You can add support for other types by adding an assetExts resolver option in your Metro Bower keeps track of these packages in a manifest file, bower. Please note that bluehost doesn’t upload folder and its content. Most common file types are supported including . Scheduler. Go to the My Media/My Documents/My Photos folder. If you don’t have that file already, you just create a blank file, and put that content into it. js is an open-source JavaScript library that is used for building user interfaces specifically for single-page applications. The KendoReact Upload helps users send files from their file systems to dedicated server handlers which are configured to receive them. Basic usage. Mobile applications definitely offer a greater value to businesses than their mobile website • Fixed Spell checker not working and missing Image Advanced Edit button in Node JS SDK • Fixed Unable to load any images or files Python Flask SDK • Fixed Upload Video not working in Rail SDk • Fixed On opening an uploaded file throws "HTTP status 404-Not Found" in Java SDK • Fixed Unable to upload images in Java SDK • Fixed On opening an uploaded file throws "Template is missing The require syntax described above can be used to statically include audio, video or document files in your project as well. If nothing happens, download GitHub Desktop and try again. xml :This file contain list . com and its affiliated web properties is provided "as is" without warranty of any kind. As of Kendo UI R1 2020 SP1 the kendo. Go to the My Media/My Documents/My Photos folder. Click on the Delete button. Material Dashboard React Nodejs . ) Simple Example. 2] - 2020-02-18 React Aaded. react-dom@^16. npm install -g expo-cli Use Expo to create a React Native app that runs on iOS, Android, and web. For the former, there is a library called react-dropzone that is built with React. File Operations. This happened right after updating the code when I tried to upload some . Pass the endpointUrl to the remote file system provider object to specify the Url at which the component can access the file system items. First we need to install the dependencies for React. ui. Note: In the documentation, the package babel-plugin-styled-components is specified, as well as a . This project provides a web file manager interface, allowing you to create your own backend connector following the connector API. 3 - Upgraded React version to 17. However, it is not designed to work with SSR. target. Mvc4. There’s nothing more to add, just check out our demo to get a clear idea of what you can do with it. It’s used for handling the view layer for web and mobile apps. jsx) by right clicking on container folder script => react folder select a file from new items dialog popup and click on Add button. /data. This is where Babel macros come in. 07 August 2019. This prod Nowadays, Node Package Manager (npm) is one of the most demanded gadgets in the web developer tool belt. You can open the Task Manager by 2 options. The value should be an async function that receives a webpack config and eventually returns a webpack config. js on cPanel. Like a photo, pdf or any other file type. Along the way, we will build a massive e-commerce application similar to Shopify using React, Redux, React Hooks, React Router, GraphQL, Context API, Firebase, Redux-Saga, Stripe + more. js on cPanel. bat ) as this generates a few files required by the build (such as SharedAssemblyVersionInfo. With this in place, feel free to open the solution file in Visual Studio or VS Code. Free Web File Thunar is developed by Benedikt Meurer, and was originally intended to replace XFFM, Xfce's previous file manager. React Scheduler Storybook is an open source tool for developing UI components in isolation for React, Vue, and Angular. Ignite UI for React also includes the most complete Microsoft Excel solution and 60+ chart types with interactive panning and zooming, touch support and much more. An online file manager which can be used on its own, or as a plugin for a rich-text editor such as CKeditor, TinyMCE or FCKeditor. png extension) Let’s take a quick look at how to manage those breaking user interactions: to the . Option 1: Type "task" in the search box beside the Start menu, and press Enter when you see the "Task Manager" app. /. export const fileItems = [{ 'name': 'Documents', 'isDirectory': true, 'category': 'Work', 'items': [{ 'name': 'Projects', 'isDirectory': true, 'category': 'Work The Custom File System Provider allows you to implement custom APIs to handle file operations (add, delete, rename, etc. Initially, the selectedFilestate is set to null Next time you’re looking for a file, it’s just a click away in the file manager. This will add the LibraryManager NuGet package (Microsoft. . The Edit screen with option to select one or more files is displayed. js) as shown below. Webix suggests a ready-made solution, which is JS File manager, that can be built into any web application. Fileside Modern, tiling file manager with unlimited panes. I have a demo on Laravel + React. React, Redux, Material UI, Nodejs, ExpressJs . We use Gatsby with TypeScript for this website, so that can also be a useful reference implementation. module folders. Work fast with our official CLI. To select a file or folder: 1. log(event. Store the file in state, and only upload when a user clicks the upload button. wav, . You can then use the Dropzone component to render the HTML5 Drag What we would like to see from a project manager is the following: - A candidate that can manage: 1 - Experience with React context api . 90/5. Declaration files. It come with unlimited customized email with your domain. Looking for the best react native app development companies? Here is the list of the top React native developers with reviews by ADA. To select a specific file, you need to use the number assigned to it. Angle - Responsive Bootstrap Admin Template. Use it as a child component of you application. As with any programming problem, there are many ways to achieve this outcome. Run npm install and npm start after that. For initialising file manager you have to install and run both of them from terminal with commands . 3. 5. react-files. /. A predictable state container for JavaScript apps. xcodeproj file. JSX Now, we need to create a first component to create a file (. A simple file manager built with react. Client implementation is an npm package which can be embed into your application. Grouping by features or routes One common way to structure projects is to locate CSS, JS, and tests together inside folders grouped by feature or route. yarn add react yarn add react-dom yarn add --dev parcel-bundler. 11, React 16/17. The File Manager is a graphical user interface component used to manage the file system. mp4, . Delete a file. Web. expo-file-system ( docs) expo-media-library ( docs) After you’ve done that we can proceed. Create shortcuts for files: Hold SHIFT and move a file with drag & drop to another folder in order to create a shortcut There are several possible ways of using Webix with React: using a Webix widget in a React app; creating a custom Webix+React component; using a Webix widget with Redux; How to Start. Option 1: Package Manager. This article explains a simple way to implement the approach to upload a single file with React. Software Package Manager. View demo Download Source. Say “MORE OPTIONS” 3. Free . You can assign custom color to every folder and tag, which makes the visual search an easy step. - Fixed minor bugs. net/` // Or you React File Manager Usage (iOS) First you need to install react-native-file-manager: npm install react-native-file-manager --save. " File Manager. Step 8: Configuring AVD Manager. On the backend, we are going to use Laravel’s Storage API to store images. Now, you can start adding Essential JS 2 File Manager component to the application. 3. All basic file handling mechanisms like upload, download, read, edit, delete, search, and sort can be performed to manage and organize the files and folder in a file system. ly/3d8cXTx To learn more about the react-native visit: The FileManager UI component can work with a file system located on the server. Angular React Vue jQuery PeaZip is a free archiver tool. config. com is looking for React JS Developers for our team in Delhi/NCR (India) Employment: Permanent Employment Place of Work: Delhi/NCR (India) CTC: Best in the industry Role. Complete file and folder manager: Create, rename, move and delete a folder. Reference React. 0 / scheduler@^0. Use the fileSystemProvider property to configure the component's file system provider. Video-React is a web video player built from the ground up for an HTML5 world using React library. Let’s begin with the Redux side of things: The Redux Code Unlike the other frameworks covered in this module, React does not enforce strict rules around code conventions or file organization. Free Frontend Preset For Nodejs . That's when we got the idea to create an orthodox WEB file manager, working on the server's site, which would be able to copy between different sources with server speed and would offer: file and directory search, a disk usage analyzer (an analogue of ncdu), simple file uploading and a lot of other great stuff. In traditional HTML sites, the file upload form forces a page refresh, which might be confusing to users. Chocolatey integrates w/SCCM, Puppet, Chef, etc. The React File Manager component allows for the easy uploading and downloading of files in a Sorting. KFM – Kae’s File Manager. Webix File Manager is a ready-made SPA. " For example, I prepare a page “Commercial files” where I will put a shortcode corresponding to the folder of files uploaded in File Manager or Google Drive. React was first created by Jordan Walke, a software engineer working for Facebook. Web The JavaScript Client Library for Azure Storage enables many web development scenarios using storage services like Blob, Table, Queue, and File, and is compatible with modern browsers. npm install npm run start Design. thumbnail support for image files; built-in media player; text editor; many other features. I want to do a very simple file explorer in react that look like the one of Files for google. Multi-Selection. js, and Mongo. React Shopping Cart. More Template Epic React - HR Management Admin Template is High Resolution: Yes, Compatible Browsers: Firefox, Safari, Opera, Chrome, Edge, Compatible With: ReactJS, Bootstrap 4. These first have been selected by most active users and ranking has been given based on the most popular votes. So follow the below setups:- 1) Install the @react-native-community/checkbox package like below in your application 2) Link the module in your application 3) Import Get code examples like "usenavigate react" instantly right from your google search results with the Grepper Chrome Extension. 6. Disclaimer: The information provided on DevExpress. 0/v14. css'; import FileManager from 'devextreme-react/file-manager'; import { fileItems } from '. Since CKEditor 4. In the process the compiler strips away all function and method bodies and preserves only the signatures of the types that are exported. You have to manually create sub-folder, then upload files into that folder. React Filemanager Hello ex angular-filemanager user, this is the new version in React. The application provides an unified, natively portable, cross-platform file manager and archive manager GUI for many Open Source technologies like 7-Zip, FreeArc, PAQ, UPX. But first, here are the benefits of hosting your React. The ASP. It is developed by laborasyon on ThemeForest. In this tutorial, we will upload an image from the react js component. 80/5. React is an open-source JavaScript library developed by Facebook used for creating web frontend and UI components. 14. File Manager and Core Data: Used to save photo, video, audio, and pdf data to the ios device url sessions: Used to communicated with the server to upload the data to the Utah State Geographical Cuba admin is super flexible, powerful, clean & modern responsive bootstrap 5 admin template with unlimited possibilities. Or if you have the optional Yarn package manager installed. React & JavaScript articles. The File Manager component supports multiple selections of files and folders in a file system. Storybook - GitHub Pages angular-filemanager. It is worth noting the beautiful design, and a ready-made set of icons, which are included in the delivery. 10. Work fast with our official CLI. Beside Material-UI, we also integrated, with the same design style, over 80 React widgets and plugins. Click on the Next button you will see a System React Fixed. Developer Express Inc disclaims all warranties, either express or implied, including the warranties of merchantability and fitness for a particular purpose. NullPointerException; TypeError: string indices must be integers – Python; valueerror: setting an array element with a sequence – Python; TypeError: a bytes-like object is required, not ‘str’ – Python Drop files, select on filesystem, copy and paste files, or add files using the API. To start the app server it will display live changes (optional) 4. 그럼 스타뜨! 배경 DCE내에서 파일 업로드 및 관리를 할수 있는 GUI 화면이 필요했다. React Scheduler Disclaimer: The information provided on DevExpress. The issue with this is that, because we’re using create-react-app, we can’t configure a lot of things unless we eject. This package support multiple files selection, cloud storage integration. Creating a file upload component is a common task in web development. Developer Express Inc disclaims all warranties, either express or implied, including the warranties of merchantability and fitness for a particular purpose. At least two fields must be present in the definition file: name and version. 0] – 2020-11-28 HTML, HTML + Laravel ADDED All-new design based on Ul/UX principles New Bordered & Dark layout New eCommerce Dashboard Invoice Bamburgh React Admin Dashboard with Reactstrap PRO is built entirely on React and uses the popular starter kit Create React App from Facebook. Now my APP only show that green circle button located in index. e. Just be sure to follow the installation instructions for “bare” or plain react-native apps. json file. Build files will be created build. First, we install dependencies using npx then download the laravel project. 4. Download Epic React – HR Management Admin Template nulled from the below download links and if the item satisfy you then buy it from the developer puffintheme for commercial use. - Minor fixes of RTL SCSS. npm install --save react npm install --save react-dom npm install --save-dev parcel-bundler. js - TS docs; Gatsby - TS Docs; All of these are great starting points. Finally, what all this was leading up to, opening that index. I guess it's technically possible to write a file manager in Node, use React for the UI, and package it as a desktop app with Electron, but I would still not call that "React based" (and C. js in your Greg Fodor - Engineering Manager Mozilla The development team involved have been very impressed by the React Admin framework and it has been capable of handling the complex challenges we have had for it thusfar. It uses React framework and supports connectors to different file storages. Drag & Drop your files in folders: Drag & Drop and image to move it into a folder, where you can find other files. File Manager: Admin can import/export & upload new files. packages. Simple event handlers are also provided as props to the browser, which allow it to respond to actions on the files. Thus you will get an example of integration usage. onChangeHandler=event=>{ console. . It has a large UI collection. dll. onChangeHandler=event=>{ console. import React from 'react'; import ReactDOM from 'react-dom'; import { FileManager, FileNavigator } from '@opuscapita/react-filemanager'; import connectorNodeV1 from '@opuscapita/react-filemanager-connector-node-v1'; const apiOptions = { connectorNodeV1. json file is saved. API-first CMS. json file and choose “Enable Restore on Build”. FileManager also performs operations like creating a new folder, moving files, and searching. This control is part of the Telerik UI for ASP. Created from revision f160547f47 on 12/4/2020. All APIs that implement access to Azure Blob Storage on the client are stored in the azure-file-system. banzay/friends-app-redux Second take on friends app. 4 - Creating RESTful services with Package Manager stores application information in three files, located in /data/system. Is the Excel Viewer widget compatible with the Webix community (opensource) edition? PHP & MySQL Projects for $2 - $10. rtl8761a_mp_chip_bt40_fw_asic_rom_patch_8192eu_new. d. Select the file to upload from the file selector dialog box; Downloading a file. pdf. NET Core FileManager lets your users browse through directories and files, akin to file managers like Windows Explorer, and manage file storage within their web applications. spatie/laravel-medialibrary Released: August 2015 Installs: 178 000 Last update: May 2017 (1 day ago). Note the command dotnet new react; this is the template I’m using for this React project. Your domain will look in this ‘public_html’ folder for a top ‘index. You'll see a folder named AppCenterReactNativeShared which contains a single framework for the required React Native iOS bridge. Be it a web-based gaming experience where you store state information in the Table service, uploading photos to a Blob account from a Mobile app, or an entire CodeSandbox at its core consists of two parts: the editor and the preview. Predefined connectors are: Client React connector for Server Node API v1 Localization in React FileManager component The file manager can be localized to any culture by defining the texts and messages of the file manager in the corresponding culture. babelrc file present in the application root folder. jpeg extension) Uploading an image where the file extension has been intentionally changed and Cloudinary could process it, but the DOM could not render the file (eg. Web based File Manager Manage files online From within the free control panel, an easy to use File Manager helps you to upload files, download files or even edit HTML, PHP or other programming language files. 2. The content of package. Create a new file called manager. All basic file operations like creating a new folder, uploading and downloading of files in the file system, and deleting and renaming of existing files and folders are available in the file manager component. Used technologies. askwon/Filet-Manager Web-based file transfer client written in React, Redux, and Go; ayxos/react-cellar Typescript, MongoDb, Webpack, EC6, Typings, Redux Wine-Cellar; azu/read-all-later [Electron] Read All Later is a client for Pocket. - Added New Auth pages. To run the service, create an Amazon S3 account and a S3 bucket and then register your amazon S3 client account details like bucketName, awsAccessKeyId, awsSecretKeyId and awsRegion details in RegisterAmazonS3 method to perform the file operations. 9. 5. Angle is an admin template based on Bootstrap and multiple frameworks. Personalize your React grid with flexible API. Organizing your blog media files with the Real Media Library plugin is as easy as dragging and dropping them into folders. JavaScript File Manager or in other words File Explorer is a web widget, part of the UI framework for managing files. Developed at Facebook and released to the world in 2013, it drives some of the most widely used apps, powering Facebook and Instagram among countless other applications. light. . Here native file viewer means we are not going to view the file in our application instead we will pick the file from file picker and will pass the file URL to FileViewer component provided by react-native-file-viewer, this component will trigger the native iOS/Android file viewer to open the file. Say “MORE OPTIONS” 3. ). To delete one or more files, 1. To download a remote file’s content to a local file on the device, here’s the code: Hi Dev, In this blog, I will show you how to install file manager package in laravel application. Install Step 1 npm i react-native-azure-blob-storage-manager --save Step 2 Dependencies npm install --save react-native-background-upload iOS cd ios pod install Manual Installation Installation. Node. Use Git or checkout with SVN using the web URL. Page 16. Documentation. We are going to use react-dropzone to build an image uploader. Hello ex angular-filemanager user, this is the new version in React. react file manager diff --git a/src/main/resources/cdtocode/doc/Apache OODT File Manager/React file manager.txt b/src/main/resources/cdtocode/doc/Apache OODT File Manager/React file manager.txt new file mode 100644 index 0000000000000000000000000000000000000000..600b501136a12f398d7b5d1d04cd146a9e6d4d6e --- /dev/null +++ b/src/main/resources/cdtocode/doc/Apache OODT File Manager/React file manager.txt @@ -0,0 +1 @@ +react file manager All components included in this dashboard template has been developed to bring all the potential of HTML5 and Bootstrap plus a set of new features (JS and CSS) ideal for your next dashboard admin theme or admin web application project. 확장성을 보유할것 외부 프로젝트에서도 Description. Redux helps you write applications that behave consistently, run in different environments (client, server, and native), and are easy to test. It has a beautiful design, as you can see from the live previews and it contains a LOT of components and features. filemanager namespace exposes the FileManagerCommand class that could be extended to implement a custom File Manager command. Just display a list within 2 predifined tabs (folders). Create React App - TS docs; Next. The Edit screen with option to select one or more files is displayed. Add Start script to package. 9,676 4. Inbuilt Search textbox in FileManager: See Also. Module files are represented in the design manager in a multi-pane module editor. To include the File Manager component in application import the FileManagerComponent from ej2-react-filemanager package in App. 1 - 28 November 2020 ----- - Upgraded Bootstrap version to 4. Filemanager with React & Nodejs . prod. The download manager handles HTTP connections, monitors connectivity changes, reboots, and ensures each download completes successfully. I would like this shortcode to be dynamic, i. Grab the demo from Github if you haven't done this yet. Mobile applications definitely offer a greater value to businesses than their mobile website In this tutorials we will use a package named @react-native-community/checkbox to add checkboxes in react native. Express your opinions freely and help others including your future self You can customize Storybook's webpack setup by providing a webpackFinal field in . 4. 3. import React from 'react'; import 'devextreme/dist/css/dx. js, Express and TypeScript. Install the React components and choose a theme that suits your needs. target. 2 - Ability to translate Wireframes and PSD Designs into functional web apps using HTML5, React , Node. In this tutorial you will learn how to create a working file upload component with react from scratch using no dependencies other than react itself. 2/6. /. You can rearrange the order of your files by dragging them around to move the important files to the top of the list for faster access. Run the Drupal Page having React Nested modals aren’t supported, but if you really need them the underlying react-overlays can support them if you're willing. Download the corresponding App Center SDK for iOS frameworks provided as a zip file and unzip it. NET Core suite along with 100+ fully-featured UI components designed to speed up delivery & improve every aspect of target. You’ll see a plus symbol to the left of the file or folder. com and its affiliated web properties is provided "as is" without warranty of any kind. An electron based file manager. The file manager application is like the heart of a smartphone. Modal's "trap" focus in them, ensuring the keyboard navigation cycles through the modal, and not the rest of the page. Async uploading with AJAX, or encode files as base64 data and send along form post. Overview of Kendo UI FileManager; Sort in Kendo UI FileManager; Toolbar Commands in Kendo UI FileManager Express your opinions freely and help others including your future self I am a beginner in react. LibraryManager. Developed with the latest jQuery plugins. html and . Complete file and folder manager: Create, rename, move and delete a folder. It's very important for me your collaboration on my development tasks and time. Test your JavaScript, CSS, HTML or CoffeeScript online with JSFiddle code editor. Go to react_code\src and change the apiUrl inside config. js as per your current url of Drupal. rtl8761a_mp_chip_bt40_fw_asic_rom_patch_8192ee_new. Say “EDIT MODE”. 1. To enable profiling in production mode, modify Webpack configuration file (config/webpack. To download and start utilizing Syncfusion's Essential Studio for React components, see our pricing model. A simple file manager built with react. azurewebsites. 0. Vue. Updated laravel 7 to all full version and starter-kit; React Fixed. mp4 videos to the server. Learn to build modern web applications using Angular, React & Vue! File Upload Component with Vue. A file input (dropzone) management component for React. TIP: If you have never seen a dot file (a file starting with a dot) it might be odd at first because that file might not appear in your file manager, as it’s a hidden file. Unlike vanilla Bootstrap, autoFocus works in Modals because React handles the implementation Free download Filedash – File Manager Dashboard Nulled. Maybe later i can have a button to have the grid view File-Manager 개발정의서 들어가며 본 문서는 인수인계 목적이 아닌 개발이 완료된 제품에 대한 이해를 돕기위해 제작된 개발 정의서입니다. Say for instance that you want to open the file select dialogue for a user to select an file to upload. This sample demonstrates how to utilize the Amazon S3 file system provider to manage the files in File Manager component. Deploy Trillo File Manager from GCP Marketplace In this tutorials we will use a package named @react-native-community/checkbox to add checkboxes in react native. Choose a device definition, Nexus 5X is suggestable. Create a new project with React Native. React also allows us to create reusable UI components. Use Git or checkout with SVN using the web URL. bs4 File Manager. v6. js . Source code: https://bit. Learn more . log(event. Also, you might want to customize the look of the file input in the form to make it resonate with your overall app design. Chocolatey is trusted by businesses to manage software deployments. Conclusion Let’s work that out. Scheduler. svg'; // replace it with your path // Profile upload helper const HandleImageUpload = => { // we are referencing the file input const imageRef = useRef(); // Specify the default image const [defaultUserImage React Filemanager. Download Nulled Filedash – File Manager Dashboard. 1 - Added new Scrollable layout. x, Columns: 4+. FTP Access Upload files via FTP Need easier and faster way to upload and download. The default locale of the file manager is en (English). In our editor / file manager we should see a . It is distributed through NPM under the kendo-react-upload package. It was initially called Filer but was changed to Thunar due to a name clash. It can be used as a standalone app or as a middleware. I will try to make it clean and retro-compatible with the previous bridges/connectors. When it comes to both of these issues, React can help you provide a better user experience. Themes and Skinning JavaScript - jQuery, Angular, React, Vue React Data Grid. Build files will be created build. Select, Copy, Paste, and Delete. Python dictionary add, delete, update, exists keys with performance; java. react-dropzone is a React’s implementation of popular drag and drop library for file uploading. And in our opinion, the Webix library offers the best solution available on the market. Free bootstrap snippets, examples and resources built with html, css and js. Adding React File Manager for PDF Library In the previous section, we added the File Server Node API component from filemanager (by -OpusCapita) . So follow the below setups:- 1) Install the @react-native-community/checkbox package like below in your application 2) Link the module in your application 3) Import Free React Design System For Bootstrap 4 (reactstrap) 9,824 4. You can add spans to any grid element, fine-tune the table sizes, specify the columns’ auto width, and freeze one or more columns. Folder based file browser given a flat keyed list of objects, powered by React. 10. Client React connector for Google Drive API v2; Detailed documentation for each package is coming soon. You can fire up the project with dotnet run to see what the scaffold does for you. /assets/images/defaultUser. It is fully responsive, built with Bootstrap 4 Framework, HTML5, CSS3 and SCSS. The editor is the whole CodeSandbox application (file manager, code editor, dependency settings) and the preview is the result you see on the right. Extension for Visual Studio Code - Simple extensions for React, Redux and Graphql in JS/TS with ES7 syntax Another file format that uses the MD file extension is Moneydance Financial Data. import React, { useEffect, useRef, useState } from 'react'; // Specify camera icon to replace button text import camera from '. - Added Blog List, Blog Grid, Blog Details pages. Site Navigation and Layout. React Native This is an exact mirror of the React Native project, A lightweight and easy-to-use password manager Clonezilla. All the operating systems got a file manager to filter the required files. Create React App – How to Create and Deploy a React Application to Production. and we can drill down into various modules. 5. 0 To do so, right-click the libman. By default, Storybook's webpack configuration will allow you to: Import Images and other static files Semantic UI React provides React components while Semantic UI provides themes as CSS stylesheets. mov, . (Ex – Facebook, Twitter and Google. Then add the File Manager component as shown in below code example. Web. View . This is one of the admin tools that our customers manage their static files on shared host. Initially, the selectedFilestate is set to null The FileBrowser dialogs consist of a FileBrowser object, an object previewer/property manager and a file uploader tab. Let’s install Bootstrap and React. ” - [source] You can delete the files from My Media, My Documents or My Photos folders. Managing your React. Site Navigation and Layout. This can be done in one of two ways: Run bower install --save for each package (the --save flag adds the dependencies (name and version) to the bower. fThe file is called : "FirstReactApp. bs4 File Manager. A partition and disk Adds React debugging tools to the Chrome Developer Tools. This is a Sample React Plugin for Apache OODT File Manager. The FileBrowser dialogs consist of a FileBrowser object, an object previewer/property manager and a file uploader tab. Thunar is designed to start up faster and be more responsive than some other Linux file managers, such as Nautilus and Konqueror. rtl8761a_mp_chip_bt40_fw_asic_rom_patch_8812ae_new. This is an example file with default selections. new file manager windows 10 Executive Summary These course materials were originally designed for Google managers to help them transition from an individual contributor role to a manager role. jsx". dll and React. filebrowser provides a file managing interface within a specified directory and it can be used to upload, delete, preview, rename and edit your files. Files. json must be written in JSON. I will try to make it clean and retro-compatible with the previous bridges/connectors It's very important for me your collaboration on my development tasks and time. They are not part of the template and NOT included in the final purchase files. So in the above imports, the files would be CartTotal. In XCode, in the project navigator, right click Libraries Add Files to [your project's name] Go to node_modules react-native-file-manager and add the . LibraryManager. You can go for either an HTML5 drag & drop file uploader or use the traditional way. json file is saved. I want somebody who can redo what has been done and finish it. On the file manager for a domain you have a ‘public_html’ folder. dll (if using MVC 4) in your Web Application project Your first build always needs to be done using the build script ( dev-build. The FileBrowser provides the ability to browse directories and locate a file item. Downloading the file. cs ). ) Wrap long file names in the File Manager’s detail view Customize icons in the Folder Tree. com, lands you with the opportunity of working with a leading technology organization. v 2. You Install react-file-reader (A flexible ReactJS component for handling styled HTML file inputs. files[0]) } On saving, create-react-app will instantly refresh the browser. babelrc configuration file. Looking for the best react native app development companies? Here is the list of the top React native developers with reviews by ADA. Unfortunately it can be quite intimidating. if I add/remove files in File Manager, it will react dynamically on the front-side (so I don’t need to modify the shortcode or put a React is a popular open-source JavaScript library – many of you asked for an easier integration between Power BI and React web applications. Free bootstrap snippets, examples and resources tagged with file-manager, html, css and js. Themes and Skinning JavaScript - jQuery, Angular, React, Vue React Data Grid. Please help me to move forward with a donation by paypal :) The file manager component is used to browse, manage, and organize the files and folders in a file system through a web application. js. js file to a . Build) to the project, which will trigger a restore as part of project build. json file and choose “Enable Restore on Build”. Any FOSS lover is warmly welcomed A lot of people name React components with a capital letter in the file, to distinguish them from regular JavaScript files. To delete one or more files, 1. When viewing a module locally, the files are contained within module-name. Input A file input management component for React. WP Media Folder v5. Upload React website to subdomain Open File Manager Create new folder inside “public_html” Upload whole content of “build” folder into this new created folder. The “React JS Developer” role at one. How can I create a custom command for the Kendo UI File Manager? Creating a Custom Command. Restore on demand Library Manager will restore client-side libraries whenever the libman. It allows the creation of multiple users and each user can have its own directory. In this tutorial we are going to create a task manager application from scratch with react. Once a file item is selected, it (or its properties) is loaded in the previewer. The DevExtreme JavaScript FileManager component allows you to display and manage files and directories for different file systems. To make the functions work as expected, I transpile these into CommonJS format in addition to transpiling React JSX files. 부디 도움이 되길 바랄 뿐입니다. It's a command-line utility connected with the corresponding online repository of packages and is capable of package installation, version management, and dependency management. Step 9: Configuring AVD Manager. This time with Trillo File Manager is an application for Dropbox-like functionality on the top of the GCS cloud storage bucket. However, you don’t want to use the standard file input HTML element, instead use a styled link or button to show the file window. 3 - Binding of UI elements to JavaScript object models. 2. React is one of the best choices for building modern web applications. js file. Default configuration. This is a Sample React Plugin for Apache OODT File Manager. React has a slim API, a robust and evolving ecosystem and a great community. If nothing happens, download Xcode and try again. com and its affiliated web properties is provided "as is" without warranty of any kind. After downloading the installation file of it, double click on it and proceed with the installation. The KendoReact Upload component is part of the KendoReact library of React UI components. common. Say “EDIT MODE”. json React Component by Creating. You can delete the files from My Media, My Documents or My Photos folders. The new React component supports both JavaScript and TypeScript and will help you embed your analytics in a React web application. Our file caching system will have two main parts. Tailor fman to your needs with its powerful plugin system. React can handle a single button, a few pieces of an interface, or an app's entire user interface. js. Requirements Creating a File Upload Component with React. react-native-azure-blob-storage-manager. npm install react-files --save Usage Basic I don't think there is one, but it's such a strange question, React is used on the web as a frontend library, while a file manager runs on your local computer. Then use the Axios library to send the file request to the Laravel server and saves the image in the server. ReactOS is a free and open-source operating system for x86/x64 personal computers intended to be binary-compatible with computer programs and device drivers made for Windows Server 2003. Component { render() { return ( ); } } export default App; File Manager can be initialized using the tag. npm start To create a new build inside dist directory. Disclaimer: The information provided on DevExpress. Download Manager is a system service which optimizes the handling of long-running downloads in the background. The first is a React component, which will wrap around RNFetchBlob’s functionality and respond to changes in the Redux store. This method of deleting corrupted files requires you to close "Windows Explorer" through "Task Manager". json. A list of its key features is given below. The MD file stores transactions, budgets, stock information, bank accounts, and other related data for the Moneydance finance software. The second is a set of actions and reducers on the Redux store which deal specifically with file caching. apiOptions, apiRoot: `http://opuscapita-filemanager-demo-master. Integrate TinyMCE editor in Laravel with a File Manager / Image Upload Jquery PHP July 27, 2019 2,006 views Create Sortable, drag and drop multi-level list with jquery like wordpress menu page All Webix widgets and applications function well in the Angular or React environment. Today we will create File Manager App UI with animation using React Native. Sweet Alert in dark layout; Design Files Removed. Installation. Free bootstrap snippets, examples and resources tagged with file-manager, html, css and js. . dll. When a TypeScript script gets compiled there is an option to generate a declaration file (with the extension . You have high end protection; It also has a file manager that is easy to access. With React, you can create reusable components that are independent of each other. Use the Download button in the toolbar. The following table represents the default texts and messages of the file manager in en culture. This project based course will introduce you to all of the modern toolchain of a React developer in 2020. For example, users can upload images, videos, etc on Facebook, Instagram. Source + Demo. Data List; React DataTable Component Vue based front-end for File Manager Aug 01, 2018 1 min read. To associate your repository with the react-electron topic, visit your repo's landing page and select "manage topics. . 6. Benefits of Hosting React. This will add the LibraryManager NuGet package (Microsoft. Add events to precisely control file/folder operations (folder creation, file uploading, moving, deleting, etc. 6. Go through the following steps for creating React project to download file from server using React. A full list of the compatible frameworks and integration examples you can find on this page . Files and folders in the file system can be sorted in either ascending or descending order simply by npm install --save @opuscapita/react-filemanager @opuscapita/react-filemanager-connector-node-v1. If nothing happens, download GitHub Desktop and try again. import ReactFileReader from 'react-file-reader'; class Because the other files & folders above (some truncated) are usually part of a default react-native init installation, our focus would be on the src folder:. tsx. 5, npm 6. Drag & Drop your files in folders: Drag & Drop and image to move it into a folder, where you can find other files. To configure the AVD Manager click on the respective icon in the menu bar. These first have been selected by most active users and ranking has been given based on the most popular votes. Accessible , tested with AT software like VoiceOver and JAWS, navigable by Keyboard . changing a . target. Install from NPM and include it in your own React build process (using Browserify, Webpack, etc). The project is about uploading a users products/services. The name npm (Node Package Manager) stems from when npm first was created as a package manager for Node. If multiple packages depend on a package - jQuery for example - Bower will download jQuery just once. React. View . Communicating react with asp. holyidiot updated Vuexy - Vuejs, React, HTML & Laravel Admin Dashboard Template with a new update entry: Update [6. 0] – 2020-11-28 Latest Update [6. ts) that functions as an interface to the components in the compiled JavaScript. Added 2021-01-09 file-manager,file-browser spofly Desktop app to find lyrics of currently playing song on spotify. You have two options for creating a file uploader. ReactOS will only be compatible with computers that are compatible with Windows 2003 or XP. ej2-react-filemanager. Basic usage. Any FOSS lover is warmly welcomed React Native ; Bootstrap file-manager examples. Bower provides hooks to facilitate using packages in your tools and workflows. jsx" Select Web => JSX File, and enter file name "FirstReactApp. The FileManager provides an inbuilt Search functionality, allowing you to find the specific file in the currently selected folder. React File Manager: A Lightweight & Customizable Component File upload and download. File Browser Front-end. Paper Kit ReactOS is a free, opensource reimplementation of windows Related: How to Copy and Paste Text, Files and Folders in Linux Terminal. Developer Express Inc disclaims all warranties, either express or implied, including the warranties of merchantability and fitness for a particular purpose. Removed complementary design files from the package [5. Uploading Files using HTML5 Uploader. TagSpaces features basic file management operations, so it can be used as simple file manager. To associate your repository with the react-electron topic, visit your repo's landing page and select "manage topics. dhtmlxGrid contains rich API functionality. This is an unparalleled array of features, design elements and reusable components Introduction to Asus file manager. To do so, right-click the libman. /assets/images/camera. Try All UI Components for Free All UI components for React are part of the dhtmlxSuite library. Changing the Webpack config. Add multiple URL to pocket at a time. Create better React apps faster and add data visualizations with the world's fastest, virtualized, real-time React data grid and streaming financial and business charts. File uploading means a user from a client machine wants to upload files to the server. net core on remote server (httpdocs folder) does not work RSS 0 replies Last post 2 hours, 59 minutes ago by fiazahmed An electron based file manager. 10 – WordPress File Manager Using the default WordPress media manager also means that the plugin will be very compatible with all the other plugins you use. It’s not a visual file manager, but it gives a set of functions to easily handle media/files in your Laravel app. html’ file to run on start up. Once a file item is selected, it (or its properties) is loaded in the previewer. Read the full article at: http://bgwebagency. The presence of these handlers enables the buttons and/or the drag & drop responsiveness. (eg. However, newer versions of the program use . The FileManager uses file system providers to access file systems. fastlane/ This folder, as you might React doesn’t have opinions on how you put files into folders. files[0]) } On saving, create-react-app will instantly refresh the browser. No action needed. pdf file to use a . Firebase issue during npm install [5. The standard tool for this task is Babel. The create-react-app utility configures tools such as Babel and webpack for the client-side React application. Build) to the project, which will trigger a restore as part of project build. In this section, we are going to add the Client React component from OpusCapita for navigating the folders and listing the files in our PDF library. nyc_output and coverage folder containing our instrumentation detail. . In terms of frontend frameworks, this React admin dashboard is powered by Material-UI, which is the most popular material-based UI components framework available today. - Added File Manager Page. MONEYDANCE files instead. Blog Post. Page 16. lang. A file input (dropzone) management component for React. Videos: Manage member level settings & Videos created by Users. storybook/main. ) Channels: Manage member level settings & Channel created by Users. It enables the user to perform common file operations such as accessing, editing, uploading, downloading, and sorting files and folders. To upload a file with React and Laravel, create a React file component and backend in Laravel. 2. React Chart. The following sample is extracted from Android 4 ICS emulator image. Similarly, every smartphone has a file manager to view, edit, and create any text files, delete, sort, or rename, copy, and cut whenever required. All npm packages are defined in files called package. ). The official front-end framework for building experiences that fit seamlessly into Microsoft 365. Create shortcuts for files: Hold SHIFT and move a file with drag & drop to another folder in order to create a shortcut Bootstrap snippets. 1. js file. To select a specific file, you need to use the number assigned to it. See bundler defaults for the full list. Bower is optimized for the front-end. 15. React. This allows teams to set conventions that work best for them, and to adopt React in any way they would like to. It also supports uploading a file by dragging it from Windows Explorer to FileManager control. Cezerin is open-source ecommerce platform. Tailor your React grid component according to your needs. dll GitHub - networknt/react-file-manager: A react remote file manager with Light Framework as back end file system. Social Sites Integration: With one click, you can login to your site using Social Sites. Using the arrow keys, move over the desired file or folder and press Space on the keyboard. 9 all file uploads, including those initiated by the File Browser plugin, expect a JSON response (like this one ). In addition to building skills, this curriculum incorporates introspection, perspective shifting, and awareness building. These two parts are very decoupled and only communicate using postMessage. Plugin has two parts: Front-end & Back-end. Note that your changes would be temporary and will not persist between re-runs of your package manager. Other Feature Module in React template: Voice and Video Call File Manager Contacts and Email Departments and Designations Timesheet and Overtime Kanban Board Payroll, Payslip and Payrun Company Policies Performance, Goal Tracking, Training and Promotion Resignation and Termination Faq and Knowledgebase Profile Settings, Profile and Edit Profile 🎈 React Material-UI. August 08, 2018. Store the file in state, and only upload when a user clicks the upload button. Overview. This package help you to upload file and assests from react native project to your azure blob storage service. If you want to download large files/streaming you can use Android Download Manager. React components can be installed via yarn or npm: After install, import the minified CSS file in your app's entry file: File manager built with the help of Suite components: Layout, Grid, DataView, Toolbar, etc. js doesn’t have to be hard and with these few steps, you can do it yourself. js'; class App extends React. Use npm to install the Expo CLI command line utility from the Windows Command Prompt, PowerShell, Windows Terminal, or the integrated terminal in VS Code (View > Integrated Terminal). Clear the cache from admin panel. js , or Total. How you use packages is up to you. html file should reveal our code coverage in a human readable and hopefully revealing way. We will cd into react project react-file-upload – cd react-file-upload Now will install dependencies – npm install bootstrap npm install react-toastify npm install axios The bootstrap help to create ui based on bootstrap 4, react-toastify is use to display beautiful notification into react app and axios for HTTP client. Save time by quickly jumping to directories. CKEditor 4 can be easily integrated with an external file manager (file browser/uploader) thanks to the File Browser plugin which by default is included in the Standard and Full presets. A very smart filemanager to manage your files in the browser developed in AngularJS following Material Design styles by Jonas Sciangula Street. These files will always be rendered/loaded to the page when an instance of the module is on the page (Module instances are the individual rendered modules on the page). FileManager Conclusion To make a file downloadable from your website, start by creating a folder on your server for both your website's HTML page and the file you want to share. Select the file in the manager. It is often used for developing Web Applications or Mobile Apps. mp3, . Edit: But As a web app. 2. You can add a custom thumbnail and text description to every file or folder. That said there are a few common approaches popular in the ecosystem you may want to consider. changing a . Drag and Drop Support in React FileManager component 23 Feb 2021 / 1 minute to read The file manager allows files or folders to be moved from one folder to another by using the allowDragAndDrop property. Once you make the folder, you can find it by using your Control Panel's file manager or the file browser in your FTP program. In XCode, in the project navigator, select your project. files[0]holds the actual file and its details. react-file-manager. React Native ; Bootstrap file-manager examples. Download the App Center SDK for React Native frameworks provided as a zip file and unzip it. React, Node v12. A dual-pane file manager for Mac, Windows and Linux. Restore on demand Library Manager will restore client-side libraries whenever the libman. html, the rest of the website build in React shows nothing (white page). Set Api path for React. To connect the component with the file system items, assign the Remote File System Provider to the fileSystemProvider property. React File Manger Multi-column File Manager based on react-beautiful-dnd. React JS Developers one. js. Here are some of the best places to find up-to-date information on React and TypeScript: React TypeScript Cheatsheets React is a JavaScript library that aims to simplify development of visual interfaces. svg'; // replace it with your path // Specify your default image import defaultUser from '. json. File Manager. css'; import 'devextreme/dist/css/dx. js under dist/ directory. /. The FileBrowser provides the ability to browse directories and locate a file item. Chocolatey is software management automation for Windows that wraps installers, executables, zips, and scripts into compiled packages. React Chart. npm run build. files[0]holds the actual file and its details. This one is a little different. All of the files shared are under GPL License. Like a photo, pdf or any other file type. 3] - 2020-04-04 VueJS + Laravel, HTML + Laravel Updated. in/building-a-full-st Related Posts: How to download file from server using Angular; Prerequisites. Cronus File Manager Live Demo. Files files will be hosted on the server on a cloud service. js under dist/ directory. Allows creating a Progressive Web Apps built with React and Node. A flexible and beautiful Select Input control for ReactJS with multiselect, autocomplete and ajax support. You can add support for other types by adding an assetExts resolver option in your Metro Bower keeps track of these packages in a manifest file, bower. Please note that bluehost doesn’t upload folder and its content. Most common file types are supported including . Scheduler. Go to the My Media/My Documents/My Photos folder. If you don’t have that file already, you just create a blank file, and put that content into it. js is an open-source JavaScript library that is used for building user interfaces specifically for single-page applications. The KendoReact Upload helps users send files from their file systems to dedicated server handlers which are configured to receive them. Basic usage. Mobile applications definitely offer a greater value to businesses than their mobile website • Fixed Spell checker not working and missing Image Advanced Edit button in Node JS SDK • Fixed Unable to load any images or files Python Flask SDK • Fixed Upload Video not working in Rail SDk • Fixed On opening an uploaded file throws "HTTP status 404-Not Found" in Java SDK • Fixed Unable to upload images in Java SDK • Fixed On opening an uploaded file throws "Template is missing The require syntax described above can be used to statically include audio, video or document files in your project as well. If nothing happens, download GitHub Desktop and try again. xml :This file contain list . com and its affiliated web properties is provided "as is" without warranty of any kind. As of Kendo UI R1 2020 SP1 the kendo. Go to the My Media/My Documents/My Photos folder. Click on the Delete button. Material Dashboard React Nodejs . ) Simple Example. 2] - 2020-02-18 React Aaded. react-dom@^16. npm install -g expo-cli Use Expo to create a React Native app that runs on iOS, Android, and web. For the former, there is a library called react-dropzone that is built with React. File Operations. This happened right after updating the code when I tried to upload some . Pass the endpointUrl to the remote file system provider object to specify the Url at which the component can access the file system items. First we need to install the dependencies for React. ui. Note: In the documentation, the package babel-plugin-styled-components is specified, as well as a . This project provides a web file manager interface, allowing you to create your own backend connector following the connector API. 3 - Upgraded React version to 17. However, it is not designed to work with SSR. target. Mvc4. There’s nothing more to add, just check out our demo to get a clear idea of what you can do with it. It’s used for handling the view layer for web and mobile apps. jsx) by right clicking on container folder script => react folder select a file from new items dialog popup and click on Add button. /data. This is where Babel macros come in. 07 August 2019. This prod Nowadays, Node Package Manager (npm) is one of the most demanded gadgets in the web developer tool belt. You can open the Task Manager by 2 options. The value should be an async function that receives a webpack config and eventually returns a webpack config. js on cPanel. Like a photo, pdf or any other file type. Along the way, we will build a massive e-commerce application similar to Shopify using React, Redux, React Hooks, React Router, GraphQL, Context API, Firebase, Redux-Saga, Stripe + more. js on cPanel. bat ) as this generates a few files required by the build (such as SharedAssemblyVersionInfo. With this in place, feel free to open the solution file in Visual Studio or VS Code. Free Web File Thunar is developed by Benedikt Meurer, and was originally intended to replace XFFM, Xfce's previous file manager. React Scheduler Storybook is an open source tool for developing UI components in isolation for React, Vue, and Angular. Ignite UI for React also includes the most complete Microsoft Excel solution and 60+ chart types with interactive panning and zooming, touch support and much more. An online file manager which can be used on its own, or as a plugin for a rich-text editor such as CKeditor, TinyMCE or FCKeditor. png extension) Let’s take a quick look at how to manage those breaking user interactions: to the . Option 1: Type "task" in the search box beside the Start menu, and press Enter when you see the "Task Manager" app. /. export const fileItems = [{ 'name': 'Documents', 'isDirectory': true, 'category': 'Work', 'items': [{ 'name': 'Projects', 'isDirectory': true, 'category': 'Work The Custom File System Provider allows you to implement custom APIs to handle file operations (add, delete, rename, etc. Initially, the selectedFilestate is set to null Next time you’re looking for a file, it’s just a click away in the file manager. This will add the LibraryManager NuGet package (Microsoft. . The Edit screen with option to select one or more files is displayed. js) as shown below. Webix suggests a ready-made solution, which is JS File manager, that can be built into any web application. Fileside Modern, tiling file manager with unlimited panes. I have a demo on Laravel + React. React, Redux, Material UI, Nodejs, ExpressJs . We use Gatsby with TypeScript for this website, so that can also be a useful reference implementation. module folders. Work fast with our official CLI. To select a file or folder: 1. log(event. Store the file in state, and only upload when a user clicks the upload button. wav, . You can then use the Dropzone component to render the HTML5 Drag What we would like to see from a project manager is the following: - A candidate that can manage: 1 - Experience with React context api . 90/5. Declaration files. It come with unlimited customized email with your domain. Looking for the best react native app development companies? Here is the list of the top React native developers with reviews by ADA. To select a specific file, you need to use the number assigned to it. Angle - Responsive Bootstrap Admin Template. Use it as a child component of you application. As with any programming problem, there are many ways to achieve this outcome. Run npm install and npm start after that. For initialising file manager you have to install and run both of them from terminal with commands . 3. 5. react-files. /. A predictable state container for JavaScript apps. xcodeproj file. JSX Now, we need to create a first component to create a file (. A simple file manager built with react. Client implementation is an npm package which can be embed into your application. Grouping by features or routes One common way to structure projects is to locate CSS, JS, and tests together inside folders grouped by feature or route. yarn add react yarn add react-dom yarn add --dev parcel-bundler. 11, React 16/17. The File Manager is a graphical user interface component used to manage the file system. mp4, . Delete a file. Web. expo-file-system ( docs) expo-media-library ( docs) After you’ve done that we can proceed. Create shortcuts for files: Hold SHIFT and move a file with drag & drop to another folder in order to create a shortcut There are several possible ways of using Webix with React: using a Webix widget in a React app; creating a custom Webix+React component; using a Webix widget with Redux; How to Start. Option 1: Package Manager. This article explains a simple way to implement the approach to upload a single file with React. Software Package Manager. View demo Download Source. Say “MORE OPTIONS” 3. Free . You can assign custom color to every folder and tag, which makes the visual search an easy step. - Fixed minor bugs. net/` // Or you React File Manager Usage (iOS) First you need to install react-native-file-manager: npm install react-native-file-manager --save. " File Manager. Step 8: Configuring AVD Manager. On the backend, we are going to use Laravel’s Storage API to store images. Now, you can start adding Essential JS 2 File Manager component to the application. 3. All basic file handling mechanisms like upload, download, read, edit, delete, search, and sort can be performed to manage and organize the files and folder in a file system. ly/3d8cXTx To learn more about the react-native visit: The FileManager UI component can work with a file system located on the server. Angular React Vue jQuery PeaZip is a free archiver tool. config. com is looking for React JS Developers for our team in Delhi/NCR (India) Employment: Permanent Employment Place of Work: Delhi/NCR (India) CTC: Best in the industry Role. Complete file and folder manager: Create, rename, move and delete a folder. Reference React. 0 / scheduler@^0. Use the fileSystemProvider property to configure the component's file system provider. Video-React is a web video player built from the ground up for an HTML5 world using React library. Let’s begin with the Redux side of things: The Redux Code Unlike the other frameworks covered in this module, React does not enforce strict rules around code conventions or file organization. Free Frontend Preset For Nodejs . That's when we got the idea to create an orthodox WEB file manager, working on the server's site, which would be able to copy between different sources with server speed and would offer: file and directory search, a disk usage analyzer (an analogue of ncdu), simple file uploading and a lot of other great stuff. In traditional HTML sites, the file upload form forces a page refresh, which might be confusing to users. Chocolatey integrates w/SCCM, Puppet, Chef, etc. The React File Manager component allows for the easy uploading and downloading of files in a Sorting. KFM – Kae’s File Manager. Webix File Manager is a ready-made SPA. " For example, I prepare a page “Commercial files” where I will put a shortcode corresponding to the folder of files uploaded in File Manager or Google Drive. React was first created by Jordan Walke, a software engineer working for Facebook. Web The JavaScript Client Library for Azure Storage enables many web development scenarios using storage services like Blob, Table, Queue, and File, and is compatible with modern browsers. npm install npm run start Design. thumbnail support for image files; built-in media player; text editor; many other features. I want to do a very simple file explorer in react that look like the one of Files for google. Multi-Selection. js, and Mongo. React Shopping Cart. More Template Epic React - HR Management Admin Template is High Resolution: Yes, Compatible Browsers: Firefox, Safari, Opera, Chrome, Edge, Compatible With: ReactJS, Bootstrap 4. These first have been selected by most active users and ranking has been given based on the most popular votes. So follow the below setups:- 1) Install the @react-native-community/checkbox package like below in your application 2) Link the module in your application 3) Import Get code examples like "usenavigate react" instantly right from your google search results with the Grepper Chrome Extension. 6. Disclaimer: The information provided on DevExpress. 0/v14. css'; import FileManager from 'devextreme-react/file-manager'; import { fileItems } from '. Since CKEditor 4. In the process the compiler strips away all function and method bodies and preserves only the signatures of the types that are exported. You have to manually create sub-folder, then upload files into that folder. React Filemanager Hello ex angular-filemanager user, this is the new version in React. The application provides an unified, natively portable, cross-platform file manager and archive manager GUI for many Open Source technologies like 7-Zip, FreeArc, PAQ, UPX. But first, here are the benefits of hosting your React. The ASP. It is developed by laborasyon on ThemeForest. In this tutorial, we will upload an image from the react js component. 80/5. React is an open-source JavaScript library developed by Facebook used for creating web frontend and UI components. 14. File Manager and Core Data: Used to save photo, video, audio, and pdf data to the ios device url sessions: Used to communicated with the server to upload the data to the Utah State Geographical Cuba admin is super flexible, powerful, clean & modern responsive bootstrap 5 admin template with unlimited possibilities. Or if you have the optional Yarn package manager installed. React & JavaScript articles. The File Manager component supports multiple selections of files and folders in a file system. Storybook - GitHub Pages angular-filemanager. It is worth noting the beautiful design, and a ready-made set of icons, which are included in the delivery. 10. Work fast with our official CLI. Beside Material-UI, we also integrated, with the same design style, over 80 React widgets and plugins. Click on the Next button you will see a System React Fixed. Developer Express Inc disclaims all warranties, either express or implied, including the warranties of merchantability and fitness for a particular purpose. NullPointerException; TypeError: string indices must be integers – Python; valueerror: setting an array element with a sequence – Python; TypeError: a bytes-like object is required, not ‘str’ – Python Drop files, select on filesystem, copy and paste files, or add files using the API. To start the app server it will display live changes (optional) 4. 그럼 스타뜨! 배경 DCE내에서 파일 업로드 및 관리를 할수 있는 GUI 화면이 필요했다. React Scheduler Disclaimer: The information provided on DevExpress. The issue with this is that, because we’re using create-react-app, we can’t configure a lot of things unless we eject. This package support multiple files selection, cloud storage integration. Creating a file upload component is a common task in web development. Developer Express Inc disclaims all warranties, either express or implied, including the warranties of merchantability and fitness for a particular purpose. At least two fields must be present in the definition file: name and version. 0] – 2020-11-28 HTML, HTML + Laravel ADDED All-new design based on Ul/UX principles New Bordered & Dark layout New eCommerce Dashboard Invoice Bamburgh React Admin Dashboard with Reactstrap PRO is built entirely on React and uses the popular starter kit Create React App from Facebook. Now my APP only show that green circle button located in index. e. Just be sure to follow the installation instructions for “bare” or plain react-native apps. json file. Build files will be created build. First, we install dependencies using npx then download the laravel project. 4. Download Epic React – HR Management Admin Template nulled from the below download links and if the item satisfy you then buy it from the developer puffintheme for commercial use. - Minor fixes of RTL SCSS. npm install --save react npm install --save react-dom npm install --save-dev parcel-bundler. js - TS docs; Gatsby - TS Docs; All of these are great starting points. Finally, what all this was leading up to, opening that index. I guess it's technically possible to write a file manager in Node, use React for the UI, and package it as a desktop app with Electron, but I would still not call that "React based" (and C. js in your Greg Fodor - Engineering Manager Mozilla The development team involved have been very impressed by the React Admin framework and it has been capable of handling the complex challenges we have had for it thusfar. It uses React framework and supports connectors to different file storages. Drag & Drop your files in folders: Drag & Drop and image to move it into a folder, where you can find other files. File Manager: Admin can import/export & upload new files. packages. Simple event handlers are also provided as props to the browser, which allow it to respond to actions on the files. Thus you will get an example of integration usage. onChangeHandler=event=>{ console. . It has a large UI collection. dll. onChangeHandler=event=>{ console. import React from 'react'; import ReactDOM from 'react-dom'; import { FileManager, FileNavigator } from '@opuscapita/react-filemanager'; import connectorNodeV1 from '@opuscapita/react-filemanager-connector-node-v1'; const apiOptions = { connectorNodeV1. json file is saved. API-first CMS. json file and choose “Enable Restore on Build”. FileManager also performs operations like creating a new folder, moving files, and searching. This control is part of the Telerik UI for ASP. Created from revision f160547f47 on 12/4/2020. All APIs that implement access to Azure Blob Storage on the client are stored in the azure-file-system. banzay/friends-app-redux Second take on friends app. 4 - Creating RESTful services with Package Manager stores application information in three files, located in /data/system. Is the Excel Viewer widget compatible with the Webix community (opensource) edition? PHP & MySQL Projects for $2 - $10. rtl8761a_mp_chip_bt40_fw_asic_rom_patch_8192eu_new. d. Select the file to upload from the file selector dialog box; Downloading a file. pdf. NET Core FileManager lets your users browse through directories and files, akin to file managers like Windows Explorer, and manage file storage within their web applications. spatie/laravel-medialibrary Released: August 2015 Installs: 178 000 Last update: May 2017 (1 day ago). Note the command dotnet new react; this is the template I’m using for this React project. Your domain will look in this ‘public_html’ folder for a top ‘index. You'll see a folder named AppCenterReactNativeShared which contains a single framework for the required React Native iOS bridge. Be it a web-based gaming experience where you store state information in the Table service, uploading photos to a Blob account from a Mobile app, or an entire CodeSandbox at its core consists of two parts: the editor and the preview. Predefined connectors are: Client React connector for Server Node API v1 Localization in React FileManager component The file manager can be localized to any culture by defining the texts and messages of the file manager in the corresponding culture. babelrc file present in the application root folder. jpeg extension) Uploading an image where the file extension has been intentionally changed and Cloudinary could process it, but the DOM could not render the file (eg. Web based File Manager Manage files online From within the free control panel, an easy to use File Manager helps you to upload files, download files or even edit HTML, PHP or other programming language files. 2. The content of package. Create a new file called manager. All basic file operations like creating a new folder, uploading and downloading of files in the file system, and deleting and renaming of existing files and folders are available in the file manager component. Used technologies. askwon/Filet-Manager Web-based file transfer client written in React, Redux, and Go; ayxos/react-cellar Typescript, MongoDb, Webpack, EC6, Typings, Redux Wine-Cellar; azu/read-all-later [Electron] Read All Later is a client for Pocket. - Added New Auth pages. To run the service, create an Amazon S3 account and a S3 bucket and then register your amazon S3 client account details like bucketName, awsAccessKeyId, awsSecretKeyId and awsRegion details in RegisterAmazonS3 method to perform the file operations. 9. 5. Angle is an admin template based on Bootstrap and multiple frameworks. Personalize your React grid with flexible API. Organizing your blog media files with the Real Media Library plugin is as easy as dragging and dropping them into folders. JavaScript File Manager or in other words File Explorer is a web widget, part of the UI framework for managing files. Developed at Facebook and released to the world in 2013, it drives some of the most widely used apps, powering Facebook and Instagram among countless other applications. light. . Here native file viewer means we are not going to view the file in our application instead we will pick the file from file picker and will pass the file URL to FileViewer component provided by react-native-file-viewer, this component will trigger the native iOS/Android file viewer to open the file. Say “MORE OPTIONS” 3. ). To delete one or more files, 1. To download a remote file’s content to a local file on the device, here’s the code: Hi Dev, In this blog, I will show you how to install file manager package in laravel application. Install Step 1 npm i react-native-azure-blob-storage-manager --save Step 2 Dependencies npm install --save react-native-background-upload iOS cd ios pod install Manual Installation Installation. Node. Use Git or checkout with SVN using the web URL. Page 16. Documentation. We are going to use react-dropzone to build an image uploader. Hello ex angular-filemanager user, this is the new version in React. react file manager diff --git "a/src/main/resources/cdtocode/doc/Apache OODT File Manager/cas-filemgr \342\200\223 CAS File Manager Developer Guide-relation.txt" "b/src/main/resources/cdtocode/doc/Apache OODT File Manager/cas-filemgr \342\200\223 CAS File Manager Developer Guide-relation.txt" new file mode 100644 index 0000000000000000000000000000000000000000..fbb01d68e919c4da23d6cedfdae91f79d032c64b --- /dev/null +++ "b/src/main/resources/cdtocode/doc/Apache OODT File Manager/cas-filemgr \342\200\223 CAS File Manager Developer Guide-relation.txt" @@ -0,0 +1,100 @@ +Introduction +This is the developer guide for the Apache OODT Catalog and Archive Service (CAS) File Manager component, or File Manager for short. Primarily, this guide will explain the File Manager architecture and interfaces, including its tailorable extension points. For information on installation, configuration, and examples, please see our User Guides. + +The remainder of this guide is separated into the following sections: + +Project Description +Architecture +Extension Points +Current Extension Point Implementations +Project Description +The File Manager component is responsible for tracking, ingesting and moving file data and metadata between a client system and a server system. The File Manager is an extensible software component that provides an XML-RPC external interface, and a fully tailorable Java-based API for file management. + +Architecture +In this section, we will describe the architecture of the File Manager, including its constituent components, object model, and key capabilities. + +Components +The major components of the File Manager are the Client and Server, the Repository Manager, the Catalog, the Validation Layer, the Versioner, and the Transferer. The relationship between all of these components are shown in the diagram below: + +File Manager Architecture + +The File Manager Server contains both a Repository that manages products (and the products' location in the archive as specified by Versioner), and a Catalog that validates metadata via the Validation Layer. Transfer of data products from the Client to the Server is the domain of the Transfer and can be initiated at either the Client or the Server. + +Object Model +The critical objects managed by the File Manager include: + +Products - Collections of one or more files, and their associated Metadata. +Metadata - A map of key->multiple values of descriptive information about a Product. See CAS-Metadata for more information on Metadata. +Reference - A pointer to a Product file's (or files') original location, and to its final resting location within the archive constructed by the File Manager. +Product Type - Descriptive information about a Product that includes what type of file URI generation scheme to use, the root repository location for a particular Product, and a description of the Product. +Element - A singular Metadata element, such as "Author", or "Creator". Elements may have additional metadata, in the form of the associated definition and even a corresponding Dublin Core attribute. See CAS-Metadata for more information on Metadata Elements. +Versioner - A URI generation scheme for Product Types that defines the location within the archive (built by the File Manager) where a file belonging to a Product (that belongs to the associated Product Type) should be placed. +Each Product contains 1 or more References, and one Metadata object. Each Product is a member of a single Product Type. The Metadata collected for each Product is defined by a mapping of Product Type->1...* Elements. Each Product Type has an associated Versioner. These relationships are shown in the below figure. + +File Manager Object Model +Key Capabilities +The File manager has been designed with a new of key capabilities in mind. These capabilities include: + +Easy management of different types of Products. The Repository Manager extension point is responsible for managing Product Types, and their associated information. Management of Product Types includes adding new types, deleting and updating existing types, and retrieving Product Type Objects, by their ID or by their name. + +Support for different kinds of back end catalogs. The Catalog extension point allows Product instance metadata and file location information to be stored in different types of back end data stores quite easily. Existing implementations of the Catalog interface include a JDBC based back end database, along with a flat-file index powered by Lucene. + +Management of Product instance information. Management includes adding, deleting and updating product instance information, including file locations (References), along with Product Metadata. It also includes retrieving Metadata and References associated with existing Products as well as obtaining the Products themselves. + +Element management for Metadata. The File Manager's Validation Layer extension point allows for the management of Element policy information in different types of back end stores. For instance, Element policy could be stored in XML files, a Database, or a Metadata Registry. + +Data transfer mechanism interface. By having an extension point for Data Transfer, the File Manager can support different Data Transfer protocols, both local and remote. + +Advanced support for File Repository layouts. The Versioner extension point allows for different File Repository layouts based on Product Types. + +Support for multiple Product structures. The File Manager Client allows for Products to be Flat, or Hierarchical-based. Flat products are collections of singular files that are aggregated together to make a Product. Hierarchical Products are Products that contain collections of directories, and sub-directories, and files. + +Design for scalability. The File Manager uses the popular client-server paradigm, allowing new File Manager servers to be instantiated, as needed, without affecting the File Manager clients, and vice-versa. + +Standard communication protocols. The File Manager uses XML-RPC as its main external interface between the File Manager client and server. XML-RPC, the little brother of SOAP, is fast, extensible, and uses the underlying HTTP protocol for data transfer. + +RSS-based Product syndication. The File Manager web interface allows for the RSS-based syndication of Product feeds based on Product Type. + +Data transfer status tracking. The File Manager tracks all current Product and File transfers and even publishes an RSS-feed of existing transfers. + +This capability set is not exhaustive, and is meant to give the user a feel for what general features are provided by the File Manager. Most likely the user will find that the File Manager provides many other capabilities besides those described here. + +Extension Points +We have constructed the File Manager making use of the factory method pattern to provide multiple extension points for the File Manager. An extension point is an interface within the File Manager that can have many implementations. This is particularly useful when it comes to software component configuration because it allows different implementations of an existing interface to be selected at deployment time. + +The factory method pattern is a creational pattern common to object oriented design. Each File Manager extension point involves the implementation of two interfaces: an extension factory and an extension implementation. At run-time, the File Manager loads a properties file specifies a factory class to use during extension point instantiation. For example, the File Manager may communicate with a database-based Catalog and an XML-based Element Store (called a Validation Layer), or it may use a Lucene-based Catalog and a database-based Validation Layer. +Using extension points, it is fairly simple to support many different types of what are typically referred to as "plug-in architectures." Each of the core extension points for the File Manager is described below: + +Catalog The Catalog extension point is responsible for storing all the instance data for Products, Metadata, and for file References. Additionally, the Catalog provides a query capability for Products. +Data Transfer The Data Transfer extension point allows for the movement of a Product to and from the archive managed by the File Manager component. Different protocols for Data Transfer may include local (disk-based) copy, or remote XML-RPC based transfer across networked machines. +Repository Manager The Repository Manager extension point provides a means for managing all of the policy information (i.e., the Product Types and their associated information) for Products managed by the File Manager. +Validation Layer The Validation Layer extension point allows for the querying of element definitions associated with a particular Product Type. The extension point also maps Product Type to Elements. +Versioning The Versioning extension point allows for the definition of different URI generation schemes that define the final resting location of files for a particular Product. +System The extension point that provides the external interface to the File Manager services. This includes the File Manager server interface, as well as the associated File Manager client interface, that communicates with the server. +Current Extension Point Implementations +There are at least two implementations of all of the aforementioned extension points for the File Manager. Each extension point implementation is detailed in this section. + +Catalog +Data Source based Catalog. An implementation of the Catalog extension point interface that uses a JDBC accessible database backend. +Lucene based Catalog. An implementation of the Catalog extension point interface that uses the Lucene free text index system to store Product instance information. +Data Transfer +Local Data Transfer. An implementation of the Data Transfer interface that uses Apache's commons-io to perform local, disk based filesystem data transfer. This implementation also supports locally accessible Network File System (NFS) disks. +Remote Data Transfer. An implementation of the Data Transfer interface that uses the XML-RPC File Manager client to transfer files to a remote XML-RPC File Manager server. +InPlace Data Transfer. An implementation of the Data Transfer interface that avoids transfering any products -- this can be used in the situation where metadata about a particular product should be recorded, but no physical transfer needs to occur. +Repository Manager +Data Source based Repository Manager. An implementation of the Repository Manager extension point that stores Product Type policy information in a JDBC accessible database. +XML based Repository Manager. An implementation of the Repository Manager extension point that stores Product Type policy information in an XML file called product-types.xml +Validation Layer +Data Source based Validation Layer. An implementation of the Validation Layer extension point that stores Element policy information in a JDBC accessible database. +XML based Validation Layer. An implementation of the Validation Layer extension point that stores Element policy information in 2 XML files called elements.xml and product-type-element-map.xml +System (File Manager client and File Manager server) +XML-RPC based File Manager server. An implementation of the external server interface for the File Manager that uses XML-RPC as the transportation medium. +XML-RPC based File Manager client. An implementation of the client interface for the XML-RPC File Manager server that uses XML-RPC as the transportation medium. +Use Cases +The File Manager was built to support several of the above capabilities outlined in Section 3. In particular there were several use cases that we wanted to support, some of which are described below. + +File Manager Ingest Use Case +The red numbers in the above Figure correspond to a sequence of steps that occurs and a series of interactions between the different File Manager extension points in order to perform the file ingestion activity. In Step 1, a File Manager client is invoked for the ingest operation, which sends Metadata and References for a particular Product to ingest to the File Manager server’s System Interface extension point. The System Interface uses the information about Product Type policy made available by the Repository Manager in order to understand whether or not the product should be transferred, where it’s root repository path should be, and so on. The System Interface then catalogs the file References and Metadata using the Catalog extension point. During this catalog process, the Catalog extension point uses the Validation Layer to determine which Elements should be extracted for the particular Product, based upon its Product Type. After that, Data Transfer is initiated either at the client or server end, and the first step to Data Transfer is using the Product’s associated Versioner to generate final file References. After final file References have been determined, the file data is transferred by the server or by the client, using the Data Transfer extension point. + +Conclusion +The aim of this document is to provide information relevant to developers about the CAS File Manager. Specifically, this document has described the File Manager's architecture, including its constituent components, object model and key capabilities. Additionally, the this document provides an overview of the current implementations of the File Manager's extension points. \ No newline at end of file diff --git "a/src/main/resources/cdtocode/doc/Apache OODT File Manager/cas-filemgr \342\200\223 CAS File Manager Developer Guide-simEnts.txt" "b/src/main/resources/cdtocode/doc/Apache OODT File Manager/cas-filemgr \342\200\223 CAS File Manager Developer Guide-simEnts.txt" new file mode 100644 index 0000000000000000000000000000000000000000..fbb01d68e919c4da23d6cedfdae91f79d032c64b --- /dev/null +++ "b/src/main/resources/cdtocode/doc/Apache OODT File Manager/cas-filemgr \342\200\223 CAS File Manager Developer Guide-simEnts.txt" @@ -0,0 +1,100 @@ +Introduction +This is the developer guide for the Apache OODT Catalog and Archive Service (CAS) File Manager component, or File Manager for short. Primarily, this guide will explain the File Manager architecture and interfaces, including its tailorable extension points. For information on installation, configuration, and examples, please see our User Guides. + +The remainder of this guide is separated into the following sections: + +Project Description +Architecture +Extension Points +Current Extension Point Implementations +Project Description +The File Manager component is responsible for tracking, ingesting and moving file data and metadata between a client system and a server system. The File Manager is an extensible software component that provides an XML-RPC external interface, and a fully tailorable Java-based API for file management. + +Architecture +In this section, we will describe the architecture of the File Manager, including its constituent components, object model, and key capabilities. + +Components +The major components of the File Manager are the Client and Server, the Repository Manager, the Catalog, the Validation Layer, the Versioner, and the Transferer. The relationship between all of these components are shown in the diagram below: + +File Manager Architecture + +The File Manager Server contains both a Repository that manages products (and the products' location in the archive as specified by Versioner), and a Catalog that validates metadata via the Validation Layer. Transfer of data products from the Client to the Server is the domain of the Transfer and can be initiated at either the Client or the Server. + +Object Model +The critical objects managed by the File Manager include: + +Products - Collections of one or more files, and their associated Metadata. +Metadata - A map of key->multiple values of descriptive information about a Product. See CAS-Metadata for more information on Metadata. +Reference - A pointer to a Product file's (or files') original location, and to its final resting location within the archive constructed by the File Manager. +Product Type - Descriptive information about a Product that includes what type of file URI generation scheme to use, the root repository location for a particular Product, and a description of the Product. +Element - A singular Metadata element, such as "Author", or "Creator". Elements may have additional metadata, in the form of the associated definition and even a corresponding Dublin Core attribute. See CAS-Metadata for more information on Metadata Elements. +Versioner - A URI generation scheme for Product Types that defines the location within the archive (built by the File Manager) where a file belonging to a Product (that belongs to the associated Product Type) should be placed. +Each Product contains 1 or more References, and one Metadata object. Each Product is a member of a single Product Type. The Metadata collected for each Product is defined by a mapping of Product Type->1...* Elements. Each Product Type has an associated Versioner. These relationships are shown in the below figure. + +File Manager Object Model +Key Capabilities +The File manager has been designed with a new of key capabilities in mind. These capabilities include: + +Easy management of different types of Products. The Repository Manager extension point is responsible for managing Product Types, and their associated information. Management of Product Types includes adding new types, deleting and updating existing types, and retrieving Product Type Objects, by their ID or by their name. + +Support for different kinds of back end catalogs. The Catalog extension point allows Product instance metadata and file location information to be stored in different types of back end data stores quite easily. Existing implementations of the Catalog interface include a JDBC based back end database, along with a flat-file index powered by Lucene. + +Management of Product instance information. Management includes adding, deleting and updating product instance information, including file locations (References), along with Product Metadata. It also includes retrieving Metadata and References associated with existing Products as well as obtaining the Products themselves. + +Element management for Metadata. The File Manager's Validation Layer extension point allows for the management of Element policy information in different types of back end stores. For instance, Element policy could be stored in XML files, a Database, or a Metadata Registry. + +Data transfer mechanism interface. By having an extension point for Data Transfer, the File Manager can support different Data Transfer protocols, both local and remote. + +Advanced support for File Repository layouts. The Versioner extension point allows for different File Repository layouts based on Product Types. + +Support for multiple Product structures. The File Manager Client allows for Products to be Flat, or Hierarchical-based. Flat products are collections of singular files that are aggregated together to make a Product. Hierarchical Products are Products that contain collections of directories, and sub-directories, and files. + +Design for scalability. The File Manager uses the popular client-server paradigm, allowing new File Manager servers to be instantiated, as needed, without affecting the File Manager clients, and vice-versa. + +Standard communication protocols. The File Manager uses XML-RPC as its main external interface between the File Manager client and server. XML-RPC, the little brother of SOAP, is fast, extensible, and uses the underlying HTTP protocol for data transfer. + +RSS-based Product syndication. The File Manager web interface allows for the RSS-based syndication of Product feeds based on Product Type. + +Data transfer status tracking. The File Manager tracks all current Product and File transfers and even publishes an RSS-feed of existing transfers. + +This capability set is not exhaustive, and is meant to give the user a feel for what general features are provided by the File Manager. Most likely the user will find that the File Manager provides many other capabilities besides those described here. + +Extension Points +We have constructed the File Manager making use of the factory method pattern to provide multiple extension points for the File Manager. An extension point is an interface within the File Manager that can have many implementations. This is particularly useful when it comes to software component configuration because it allows different implementations of an existing interface to be selected at deployment time. + +The factory method pattern is a creational pattern common to object oriented design. Each File Manager extension point involves the implementation of two interfaces: an extension factory and an extension implementation. At run-time, the File Manager loads a properties file specifies a factory class to use during extension point instantiation. For example, the File Manager may communicate with a database-based Catalog and an XML-based Element Store (called a Validation Layer), or it may use a Lucene-based Catalog and a database-based Validation Layer. +Using extension points, it is fairly simple to support many different types of what are typically referred to as "plug-in architectures." Each of the core extension points for the File Manager is described below: + +Catalog The Catalog extension point is responsible for storing all the instance data for Products, Metadata, and for file References. Additionally, the Catalog provides a query capability for Products. +Data Transfer The Data Transfer extension point allows for the movement of a Product to and from the archive managed by the File Manager component. Different protocols for Data Transfer may include local (disk-based) copy, or remote XML-RPC based transfer across networked machines. +Repository Manager The Repository Manager extension point provides a means for managing all of the policy information (i.e., the Product Types and their associated information) for Products managed by the File Manager. +Validation Layer The Validation Layer extension point allows for the querying of element definitions associated with a particular Product Type. The extension point also maps Product Type to Elements. +Versioning The Versioning extension point allows for the definition of different URI generation schemes that define the final resting location of files for a particular Product. +System The extension point that provides the external interface to the File Manager services. This includes the File Manager server interface, as well as the associated File Manager client interface, that communicates with the server. +Current Extension Point Implementations +There are at least two implementations of all of the aforementioned extension points for the File Manager. Each extension point implementation is detailed in this section. + +Catalog +Data Source based Catalog. An implementation of the Catalog extension point interface that uses a JDBC accessible database backend. +Lucene based Catalog. An implementation of the Catalog extension point interface that uses the Lucene free text index system to store Product instance information. +Data Transfer +Local Data Transfer. An implementation of the Data Transfer interface that uses Apache's commons-io to perform local, disk based filesystem data transfer. This implementation also supports locally accessible Network File System (NFS) disks. +Remote Data Transfer. An implementation of the Data Transfer interface that uses the XML-RPC File Manager client to transfer files to a remote XML-RPC File Manager server. +InPlace Data Transfer. An implementation of the Data Transfer interface that avoids transfering any products -- this can be used in the situation where metadata about a particular product should be recorded, but no physical transfer needs to occur. +Repository Manager +Data Source based Repository Manager. An implementation of the Repository Manager extension point that stores Product Type policy information in a JDBC accessible database. +XML based Repository Manager. An implementation of the Repository Manager extension point that stores Product Type policy information in an XML file called product-types.xml +Validation Layer +Data Source based Validation Layer. An implementation of the Validation Layer extension point that stores Element policy information in a JDBC accessible database. +XML based Validation Layer. An implementation of the Validation Layer extension point that stores Element policy information in 2 XML files called elements.xml and product-type-element-map.xml +System (File Manager client and File Manager server) +XML-RPC based File Manager server. An implementation of the external server interface for the File Manager that uses XML-RPC as the transportation medium. +XML-RPC based File Manager client. An implementation of the client interface for the XML-RPC File Manager server that uses XML-RPC as the transportation medium. +Use Cases +The File Manager was built to support several of the above capabilities outlined in Section 3. In particular there were several use cases that we wanted to support, some of which are described below. + +File Manager Ingest Use Case +The red numbers in the above Figure correspond to a sequence of steps that occurs and a series of interactions between the different File Manager extension points in order to perform the file ingestion activity. In Step 1, a File Manager client is invoked for the ingest operation, which sends Metadata and References for a particular Product to ingest to the File Manager server’s System Interface extension point. The System Interface uses the information about Product Type policy made available by the Repository Manager in order to understand whether or not the product should be transferred, where it’s root repository path should be, and so on. The System Interface then catalogs the file References and Metadata using the Catalog extension point. During this catalog process, the Catalog extension point uses the Validation Layer to determine which Elements should be extracted for the particular Product, based upon its Product Type. After that, Data Transfer is initiated either at the client or server end, and the first step to Data Transfer is using the Product’s associated Versioner to generate final file References. After final file References have been determined, the file data is transferred by the server or by the client, using the Data Transfer extension point. + +Conclusion +The aim of this document is to provide information relevant to developers about the CAS File Manager. Specifically, this document has described the File Manager's architecture, including its constituent components, object model and key capabilities. Additionally, the this document provides an overview of the current implementations of the File Manager's extension points. \ No newline at end of file diff --git "a/src/main/resources/cdtocode/doc/Apache OODT File Manager/cas-filemgr \342\200\223 CAS File Manager Developer Guide.txt" "b/src/main/resources/cdtocode/doc/Apache OODT File Manager/cas-filemgr \342\200\223 CAS File Manager Developer Guide.txt" new file mode 100644 index 0000000000000000000000000000000000000000..fbb01d68e919c4da23d6cedfdae91f79d032c64b --- /dev/null +++ "b/src/main/resources/cdtocode/doc/Apache OODT File Manager/cas-filemgr \342\200\223 CAS File Manager Developer Guide.txt" @@ -0,0 +1,100 @@ +Introduction +This is the developer guide for the Apache OODT Catalog and Archive Service (CAS) File Manager component, or File Manager for short. Primarily, this guide will explain the File Manager architecture and interfaces, including its tailorable extension points. For information on installation, configuration, and examples, please see our User Guides. + +The remainder of this guide is separated into the following sections: + +Project Description +Architecture +Extension Points +Current Extension Point Implementations +Project Description +The File Manager component is responsible for tracking, ingesting and moving file data and metadata between a client system and a server system. The File Manager is an extensible software component that provides an XML-RPC external interface, and a fully tailorable Java-based API for file management. + +Architecture +In this section, we will describe the architecture of the File Manager, including its constituent components, object model, and key capabilities. + +Components +The major components of the File Manager are the Client and Server, the Repository Manager, the Catalog, the Validation Layer, the Versioner, and the Transferer. The relationship between all of these components are shown in the diagram below: + +File Manager Architecture + +The File Manager Server contains both a Repository that manages products (and the products' location in the archive as specified by Versioner), and a Catalog that validates metadata via the Validation Layer. Transfer of data products from the Client to the Server is the domain of the Transfer and can be initiated at either the Client or the Server. + +Object Model +The critical objects managed by the File Manager include: + +Products - Collections of one or more files, and their associated Metadata. +Metadata - A map of key->multiple values of descriptive information about a Product. See CAS-Metadata for more information on Metadata. +Reference - A pointer to a Product file's (or files') original location, and to its final resting location within the archive constructed by the File Manager. +Product Type - Descriptive information about a Product that includes what type of file URI generation scheme to use, the root repository location for a particular Product, and a description of the Product. +Element - A singular Metadata element, such as "Author", or "Creator". Elements may have additional metadata, in the form of the associated definition and even a corresponding Dublin Core attribute. See CAS-Metadata for more information on Metadata Elements. +Versioner - A URI generation scheme for Product Types that defines the location within the archive (built by the File Manager) where a file belonging to a Product (that belongs to the associated Product Type) should be placed. +Each Product contains 1 or more References, and one Metadata object. Each Product is a member of a single Product Type. The Metadata collected for each Product is defined by a mapping of Product Type->1...* Elements. Each Product Type has an associated Versioner. These relationships are shown in the below figure. + +File Manager Object Model +Key Capabilities +The File manager has been designed with a new of key capabilities in mind. These capabilities include: + +Easy management of different types of Products. The Repository Manager extension point is responsible for managing Product Types, and their associated information. Management of Product Types includes adding new types, deleting and updating existing types, and retrieving Product Type Objects, by their ID or by their name. + +Support for different kinds of back end catalogs. The Catalog extension point allows Product instance metadata and file location information to be stored in different types of back end data stores quite easily. Existing implementations of the Catalog interface include a JDBC based back end database, along with a flat-file index powered by Lucene. + +Management of Product instance information. Management includes adding, deleting and updating product instance information, including file locations (References), along with Product Metadata. It also includes retrieving Metadata and References associated with existing Products as well as obtaining the Products themselves. + +Element management for Metadata. The File Manager's Validation Layer extension point allows for the management of Element policy information in different types of back end stores. For instance, Element policy could be stored in XML files, a Database, or a Metadata Registry. + +Data transfer mechanism interface. By having an extension point for Data Transfer, the File Manager can support different Data Transfer protocols, both local and remote. + +Advanced support for File Repository layouts. The Versioner extension point allows for different File Repository layouts based on Product Types. + +Support for multiple Product structures. The File Manager Client allows for Products to be Flat, or Hierarchical-based. Flat products are collections of singular files that are aggregated together to make a Product. Hierarchical Products are Products that contain collections of directories, and sub-directories, and files. + +Design for scalability. The File Manager uses the popular client-server paradigm, allowing new File Manager servers to be instantiated, as needed, without affecting the File Manager clients, and vice-versa. + +Standard communication protocols. The File Manager uses XML-RPC as its main external interface between the File Manager client and server. XML-RPC, the little brother of SOAP, is fast, extensible, and uses the underlying HTTP protocol for data transfer. + +RSS-based Product syndication. The File Manager web interface allows for the RSS-based syndication of Product feeds based on Product Type. + +Data transfer status tracking. The File Manager tracks all current Product and File transfers and even publishes an RSS-feed of existing transfers. + +This capability set is not exhaustive, and is meant to give the user a feel for what general features are provided by the File Manager. Most likely the user will find that the File Manager provides many other capabilities besides those described here. + +Extension Points +We have constructed the File Manager making use of the factory method pattern to provide multiple extension points for the File Manager. An extension point is an interface within the File Manager that can have many implementations. This is particularly useful when it comes to software component configuration because it allows different implementations of an existing interface to be selected at deployment time. + +The factory method pattern is a creational pattern common to object oriented design. Each File Manager extension point involves the implementation of two interfaces: an extension factory and an extension implementation. At run-time, the File Manager loads a properties file specifies a factory class to use during extension point instantiation. For example, the File Manager may communicate with a database-based Catalog and an XML-based Element Store (called a Validation Layer), or it may use a Lucene-based Catalog and a database-based Validation Layer. +Using extension points, it is fairly simple to support many different types of what are typically referred to as "plug-in architectures." Each of the core extension points for the File Manager is described below: + +Catalog The Catalog extension point is responsible for storing all the instance data for Products, Metadata, and for file References. Additionally, the Catalog provides a query capability for Products. +Data Transfer The Data Transfer extension point allows for the movement of a Product to and from the archive managed by the File Manager component. Different protocols for Data Transfer may include local (disk-based) copy, or remote XML-RPC based transfer across networked machines. +Repository Manager The Repository Manager extension point provides a means for managing all of the policy information (i.e., the Product Types and their associated information) for Products managed by the File Manager. +Validation Layer The Validation Layer extension point allows for the querying of element definitions associated with a particular Product Type. The extension point also maps Product Type to Elements. +Versioning The Versioning extension point allows for the definition of different URI generation schemes that define the final resting location of files for a particular Product. +System The extension point that provides the external interface to the File Manager services. This includes the File Manager server interface, as well as the associated File Manager client interface, that communicates with the server. +Current Extension Point Implementations +There are at least two implementations of all of the aforementioned extension points for the File Manager. Each extension point implementation is detailed in this section. + +Catalog +Data Source based Catalog. An implementation of the Catalog extension point interface that uses a JDBC accessible database backend. +Lucene based Catalog. An implementation of the Catalog extension point interface that uses the Lucene free text index system to store Product instance information. +Data Transfer +Local Data Transfer. An implementation of the Data Transfer interface that uses Apache's commons-io to perform local, disk based filesystem data transfer. This implementation also supports locally accessible Network File System (NFS) disks. +Remote Data Transfer. An implementation of the Data Transfer interface that uses the XML-RPC File Manager client to transfer files to a remote XML-RPC File Manager server. +InPlace Data Transfer. An implementation of the Data Transfer interface that avoids transfering any products -- this can be used in the situation where metadata about a particular product should be recorded, but no physical transfer needs to occur. +Repository Manager +Data Source based Repository Manager. An implementation of the Repository Manager extension point that stores Product Type policy information in a JDBC accessible database. +XML based Repository Manager. An implementation of the Repository Manager extension point that stores Product Type policy information in an XML file called product-types.xml +Validation Layer +Data Source based Validation Layer. An implementation of the Validation Layer extension point that stores Element policy information in a JDBC accessible database. +XML based Validation Layer. An implementation of the Validation Layer extension point that stores Element policy information in 2 XML files called elements.xml and product-type-element-map.xml +System (File Manager client and File Manager server) +XML-RPC based File Manager server. An implementation of the external server interface for the File Manager that uses XML-RPC as the transportation medium. +XML-RPC based File Manager client. An implementation of the client interface for the XML-RPC File Manager server that uses XML-RPC as the transportation medium. +Use Cases +The File Manager was built to support several of the above capabilities outlined in Section 3. In particular there were several use cases that we wanted to support, some of which are described below. + +File Manager Ingest Use Case +The red numbers in the above Figure correspond to a sequence of steps that occurs and a series of interactions between the different File Manager extension points in order to perform the file ingestion activity. In Step 1, a File Manager client is invoked for the ingest operation, which sends Metadata and References for a particular Product to ingest to the File Manager server’s System Interface extension point. The System Interface uses the information about Product Type policy made available by the Repository Manager in order to understand whether or not the product should be transferred, where it’s root repository path should be, and so on. The System Interface then catalogs the file References and Metadata using the Catalog extension point. During this catalog process, the Catalog extension point uses the Validation Layer to determine which Elements should be extracted for the particular Product, based upon its Product Type. After that, Data Transfer is initiated either at the client or server end, and the first step to Data Transfer is using the Product’s associated Versioner to generate final file References. After final file References have been determined, the file data is transferred by the server or by the client, using the Data Transfer extension point. + +Conclusion +The aim of this document is to provide information relevant to developers about the CAS File Manager. Specifically, this document has described the File Manager's architecture, including its constituent components, object model and key capabilities. Additionally, the this document provides an overview of the current implementations of the File Manager's extension points. \ No newline at end of file diff --git a/src/main/resources/cdtocode/doc/Jetty10 Operations Guide _ The Eclipse Foundation-relation.txt b/src/main/resources/cdtocode/doc/jetty/Jetty10 Operations Guide _ The Eclipse Foundation-relation.txt similarity index 100% rename from src/main/resources/cdtocode/doc/Jetty10 Operations Guide _ The Eclipse Foundation-relation.txt rename to src/main/resources/cdtocode/doc/jetty/Jetty10 Operations Guide _ The Eclipse Foundation-relation.txt diff --git a/src/main/resources/cdtocode/doc/Jetty10 Operations Guide _ The Eclipse Foundation-simEnts.txt b/src/main/resources/cdtocode/doc/jetty/Jetty10 Operations Guide _ The Eclipse Foundation-simEnts.txt similarity index 100% rename from src/main/resources/cdtocode/doc/Jetty10 Operations Guide _ The Eclipse Foundation-simEnts.txt rename to src/main/resources/cdtocode/doc/jetty/Jetty10 Operations Guide _ The Eclipse Foundation-simEnts.txt diff --git a/src/main/resources/cdtocode/doc/Jetty10 Operations Guide _ The Eclipse Foundation.txt b/src/main/resources/cdtocode/doc/jetty/Jetty10 Operations Guide _ The Eclipse Foundation.txt similarity index 100% rename from src/main/resources/cdtocode/doc/Jetty10 Operations Guide _ The Eclipse Foundation.txt rename to src/main/resources/cdtocode/doc/jetty/Jetty10 Operations Guide _ The Eclipse Foundation.txt diff --git a/src/main/resources/cdtocode/doc/basic-architecture-relation.txt b/src/main/resources/cdtocode/doc/jetty/basic-architecture-relation.txt similarity index 100% rename from src/main/resources/cdtocode/doc/basic-architecture-relation.txt rename to src/main/resources/cdtocode/doc/jetty/basic-architecture-relation.txt diff --git a/src/main/resources/cdtocode/doc/basic-architecture-simEnts.txt b/src/main/resources/cdtocode/doc/jetty/basic-architecture-simEnts.txt similarity index 100% rename from src/main/resources/cdtocode/doc/basic-architecture-simEnts.txt rename to src/main/resources/cdtocode/doc/jetty/basic-architecture-simEnts.txt diff --git a/src/main/resources/cdtocode/doc/basic-architecture.adoc b/src/main/resources/cdtocode/doc/jetty/basic-architecture.adoc similarity index 100% rename from src/main/resources/cdtocode/doc/basic-architecture.adoc rename to src/main/resources/cdtocode/doc/jetty/basic-architecture.adoc