Pages: [1]
  Print  
Author Topic: Log operator not working in batch mode  (Read 735 times)
stegmannt
Newbie
*
Posts: 4


« on: August 31, 2014, 02:04:14 PM »

Hi,

I'm trying to measure the execution time of selected operators. I'm using the Log operator for this, which works fine in GUI mode.

But in batch mode the the log file gets created but contains only zeros. Im using latest CE Version 5.3.

This is the process I'm using:

Code:
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.3.015">
  <context>
    <input>
      <location>reviews</location>
    </input>
    <output>
      <location>performance</location>
      <location>labeledreviews</location>
      <location>log2</location>
    </output>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="5.3.015" expanded="true" name="Process">
    <parameter key="logverbosity" value="init"/>
    <parameter key="random_seed" value="2001"/>
    <parameter key="send_mail" value="never"/>
    <parameter key="notification_email" value=""/>
    <parameter key="process_duration_for_mail" value="30"/>
    <parameter key="encoding" value="SYSTEM"/>
    <process expanded="true">
      <operator activated="true" class="log" compatibility="5.3.015" expanded="true" height="76" name="Log" width="90" x="179" y="300">
        <parameter key="filename" value="/home.local/hadoop/rapidresults/250mlog1.log"/>
        <list key="log">
          <parameter key="tfidf" value="operator.Process Documents from Data.value.execution-time"/>
          <parameter key="split" value="operator.Split Data.value.execution-time"/>
          <parameter key="apply" value="operator.Apply Model.value.execution-time"/>
          <parameter key="naive" value="operator.Naive Bayes.value.execution-time"/>
          <parameter key="tokenize" value="operator.Tokenize.value.execution-time"/>
          <parameter key="ngrams" value="operator.Generate n-Grams (Terms).value.execution-time"/>
        </list>
        <parameter key="sorting_type" value="none"/>
        <parameter key="sorting_k" value="100"/>
        <parameter key="persistent" value="false"/>
      </operator>
      <operator activated="true" class="text:process_document_from_data" compatibility="5.3.002" expanded="true" height="76" name="Process Documents from Data" width="90" x="45" y="210">
        <parameter key="create_word_vector" value="true"/>
        <parameter key="vector_creation" value="TF-IDF"/>
        <parameter key="add_meta_information" value="true"/>
        <parameter key="keep_text" value="false"/>
        <parameter key="prune_method" value="percentual"/>
        <parameter key="prune_below_percent" value="5.0"/>
        <parameter key="prune_above_percent" value="95.0"/>
        <parameter key="prune_below_rank" value="0.05"/>
        <parameter key="prune_above_rank" value="0.95"/>
        <parameter key="datamanagement" value="double_array"/>
        <parameter key="select_attributes_and_weights" value="false"/>
        <list key="specify_weights"/>
        <process expanded="true">
          <operator activated="true" class="text:tokenize" compatibility="5.3.002" expanded="true" height="60" name="Tokenize" width="90" x="45" y="75">
            <parameter key="mode" value="non letters"/>
            <parameter key="characters" value=".:"/>
            <parameter key="language" value="English"/>
            <parameter key="max_token_length" value="3"/>
          </operator>
          <operator activated="true" class="text:stem_porter" compatibility="5.3.002" expanded="true" height="60" name="Stem (Porter)" width="90" x="246" y="30"/>
          <operator activated="true" class="text:filter_stopwords_english" compatibility="5.3.002" expanded="true" height="60" name="Filter Stopwords (English)" width="90" x="246" y="120"/>
          <operator activated="true" class="text:filter_by_length" compatibility="5.3.002" expanded="true" height="60" name="Filter Tokens (by Length)" width="90" x="246" y="210">
            <parameter key="min_chars" value="2"/>
            <parameter key="max_chars" value="25"/>
          </operator>
          <operator activated="true" class="text:generate_n_grams_terms" compatibility="5.3.002" expanded="true" height="60" name="Generate n-Grams (Terms)" width="90" x="246" y="300">
            <parameter key="max_length" value="2"/>
          </operator>
          <connect from_port="document" to_op="Tokenize" to_port="document"/>
          <connect from_op="Tokenize" from_port="document" to_op="Stem (Porter)" to_port="document"/>
          <connect from_op="Stem (Porter)" from_port="document" to_op="Filter Stopwords (English)" to_port="document"/>
          <connect from_op="Filter Stopwords (English)" from_port="document" to_op="Filter Tokens (by Length)" to_port="document"/>
          <connect from_op="Filter Tokens (by Length)" from_port="document" to_op="Generate n-Grams (Terms)" to_port="document"/>
          <connect from_op="Generate n-Grams (Terms)" from_port="document" to_port="document 1"/>
          <portSpacing port="source_document" spacing="0"/>
          <portSpacing port="sink_document 1" spacing="0"/>
          <portSpacing port="sink_document 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="materialize_data" compatibility="5.3.015" expanded="true" height="76" name="Materialize Data" width="90" x="179" y="210">
        <parameter key="datamanagement" value="double_sparse_array"/>
      </operator>
      <operator activated="true" class="split_data" compatibility="5.3.015" expanded="true" height="94" name="Split Data" width="90" x="179" y="75">
        <enumeration key="partitions">
          <parameter key="ratio" value="0.7"/>
          <parameter key="ratio" value="0.3"/>
        </enumeration>
        <parameter key="sampling_type" value="stratified sampling"/>
        <parameter key="use_local_random_seed" value="false"/>
        <parameter key="local_random_seed" value="1992"/>
      </operator>
      <operator activated="true" class="naive_bayes" compatibility="5.3.015" expanded="true" height="76" name="Naive Bayes" width="90" x="313" y="30">
        <parameter key="laplace_correction" value="true"/>
      </operator>
      <operator activated="true" class="apply_model" compatibility="5.3.015" expanded="true" height="76" name="Apply Model" width="90" x="380" y="300">
        <list key="application_parameters"/>
        <parameter key="create_view" value="false"/>
      </operator>
      <operator activated="true" class="performance_classification" compatibility="5.3.015" expanded="true" height="76" name="Performance" width="90" x="313" y="165">
        <parameter key="main_criterion" value="accuracy"/>
        <parameter key="accuracy" value="true"/>
        <parameter key="classification_error" value="true"/>
        <parameter key="kappa" value="false"/>
        <parameter key="weighted_mean_recall" value="true"/>
        <parameter key="weighted_mean_precision" value="true"/>
        <parameter key="spearman_rho" value="false"/>
        <parameter key="kendall_tau" value="false"/>
        <parameter key="absolute_error" value="false"/>
        <parameter key="relative_error" value="false"/>
        <parameter key="relative_error_lenient" value="false"/>
        <parameter key="relative_error_strict" value="false"/>
        <parameter key="normalized_absolute_error" value="false"/>
        <parameter key="root_mean_squared_error" value="false"/>
        <parameter key="root_relative_squared_error" value="false"/>
        <parameter key="squared_error" value="false"/>
        <parameter key="correlation" value="false"/>
        <parameter key="squared_correlation" value="false"/>
        <parameter key="cross-entropy" value="false"/>
        <parameter key="margin" value="false"/>
        <parameter key="soft_margin_loss" value="false"/>
        <parameter key="logistic_loss" value="false"/>
        <parameter key="skip_undefined_labels" value="true"/>
        <parameter key="use_example_weights" value="true"/>
        <list key="class_weights"/>
      </operator>
      <connect from_port="input 1" to_op="Process Documents from Data" to_port="example set"/>
      <connect from_op="Log" from_port="through 1" to_port="result 3"/>
      <connect from_op="Process Documents from Data" from_port="example set" to_op="Materialize Data" to_port="example set input"/>
      <connect from_op="Materialize Data" from_port="example set output" to_op="Split Data" to_port="example set"/>
      <connect from_op="Split Data" from_port="partition 1" to_op="Naive Bayes" to_port="training set"/>
      <connect from_op="Split Data" from_port="partition 2" to_op="Apply Model" to_port="unlabelled data"/>
      <connect from_op="Naive Bayes" from_port="model" to_op="Apply Model" to_port="model"/>
      <connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
      <connect from_op="Performance" from_port="performance" to_port="result 1"/>
      <connect from_op="Performance" from_port="example set" to_port="result 2"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="source_input 2" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
      <portSpacing port="sink_result 3" spacing="0"/>
      <portSpacing port="sink_result 4" spacing="0"/>
    </process>
  </operator>
</process>

Thanks for any help.

Tobias
Logged
awchisholm
Sr. Member
****
Posts: 404


WWW
« Reply #1 on: August 31, 2014, 06:05:46 PM »

Is the operator execution order correct. If the `Log` operator executes before all the other operators, it will log zeroes.

regards

Andrew
Logged

Pages: [1]
  Print  
 
Jump to: