Pages: [1]
  Print  
Author Topic: Write WordList a text file (txt) [SOLVED]  (Read 357 times)
jose
Newbie
*
Posts: 16


« on: May 17, 2012, 05:34:03 PM »

Hi,
I have a process that reads an excel file. and then process the excel file using the operator "process document".
The output of this operator (wordlist), I write it to a text file (txt) but do not know what operator to use to accomplish this.

the xml of process is...

Code:
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.2.003">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="5.2.003" expanded="true" name="Process">
    <process expanded="true" height="370" width="614">
      <operator activated="true" class="read_excel" compatibility="5.2.003" expanded="true" height="60" name="Read Excel" width="90" x="45" y="120">
        <parameter key="excel_file" value="/home/pepe/Escritorio/documentos de prueba/Septiembre.xls"/>
        <parameter key="imported_cell_range" value="A1:CE522"/>
        <parameter key="first_row_as_names" value="false"/>
        <list key="annotations">
          <parameter key="0" value="Name"/>
        </list>
        <list key="data_set_meta_data_information">
          <parameter key="0" value="�Qu� es lo que m�s te ha gustado de Vueling y quieres que sigamos haciendo, o por el contrario, lo que menos te ha gustado y nos sugieres cambiar? .true.text.attribute"/>
        </list>
      </operator>
      <operator activated="true" class="text:process_document_from_data" compatibility="5.2.001" expanded="true" height="76" name="Process Documents from Data" width="90" x="246" y="75">
        <parameter key="vector_creation" value="Term Occurrences"/>
        <parameter key="prune_method" value="absolute"/>
        <parameter key="prune_below_absolute" value="2"/>
        <parameter key="prune_above_absolute" value="9999"/>
        <list key="specify_weights"/>
        <process expanded="true" height="505" width="636">
          <operator activated="true" class="text:tokenize" compatibility="5.2.001" expanded="true" height="60" name="Tokenize" width="90" x="103" y="58"/>
          <operator activated="true" class="text:transform_cases" compatibility="5.2.001" expanded="true" height="60" name="Transform Cases" width="90" x="246" y="75"/>
          <operator activated="true" class="text:filter_stopwords_dictionary" compatibility="5.2.001" expanded="true" height="60" name="Filter Stopwords (Dictionary)" width="90" x="45" y="165">
            <parameter key="file" value="/home/pepe/Escritorio/documentos de prueba/stop words.txt"/>
          </operator>
          <operator activated="true" class="text:filter_by_length" compatibility="5.2.001" expanded="true" height="60" name="Filter Tokens (by Length)" width="90" x="112" y="255"/>
          <operator activated="true" class="text:replace_tokens" compatibility="5.2.001" expanded="true" height="60" name="Replace Tokens" width="90" x="246" y="255">
            <list key="replace_dictionary">
              <parameter key="horas" value="hora"/>
              <parameter key="maletas" value="maleta"/>
              <parameter key="vuelos" value="vuelo"/>
              <parameter key="precios" value="precio"/>
              <parameter key="asientos" value="asiento"/>
              <parameter key="Vueling" value="vueling"/>
              <parameter key="MALETAS" value="maleta"/>
              <parameter key="VUELOS" value="vuelo"/>
            </list>
          </operator>
          <operator activated="true" class="text:generate_n_grams_terms" compatibility="5.2.001" expanded="true" height="60" name="Generate n-Grams (Terms)" width="90" x="380" y="255"/>
          <connect from_port="document" to_op="Tokenize" to_port="document"/>
          <connect from_op="Tokenize" from_port="document" to_op="Transform Cases" to_port="document"/>
          <connect from_op="Transform Cases" from_port="document" to_op="Filter Stopwords (Dictionary)" to_port="document"/>
          <connect from_op="Filter Stopwords (Dictionary)" from_port="document" to_op="Filter Tokens (by Length)" to_port="document"/>
          <connect from_op="Filter Tokens (by Length)" from_port="document" to_op="Replace Tokens" to_port="document"/>
          <connect from_op="Replace Tokens" from_port="document" to_op="Generate n-Grams (Terms)" to_port="document"/>
          <connect from_op="Generate n-Grams (Terms)" from_port="document" to_port="document 1"/>
          <portSpacing port="source_document" spacing="0"/>
          <portSpacing port="sink_document 1" spacing="0"/>
          <portSpacing port="sink_document 2" spacing="0"/>
        </process>
      </operator>
      <connect from_op="Read Excel" from_port="output" to_op="Process Documents from Data" to_port="example set"/>
      <connect from_op="Process Documents from Data" from_port="example set" to_port="result 1"/>
      <connect from_op="Process Documents from Data" from_port="word list" to_port="result 2"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
      <portSpacing port="sink_result 3" spacing="0"/>
    </process>
  </operator>
</process>

« Last Edit: May 18, 2012, 02:33:35 PM by jose » Logged
Marius
Global Moderator
Hero Member
*****
Posts: 1283



WWW
« Reply #1 on: May 18, 2012, 10:56:03 AM »

Hi Jose,

you can use the WordList to Data operator to convert the Wordlist to an example set and then write it to disk with e.g. Write CSV.

All the best,
Marius
Logged

Please add [SOLVED] to the topic title when your problem has been solved! (do so by editing the first post in the thread and modifying the title)
Please click here before posting.
jose
Newbie
*
Posts: 16


« Reply #2 on: May 18, 2012, 02:32:54 PM »

Thanks Marius.

 You are a genius.
Logged
Pages: [1]
  Print  
 
Jump to: