Pages: [1]
Author Topic: Crawling password protected site  (Read 381 times)
Posts: 6

« on: May 17, 2013, 05:51:09 PM »

Hello everyone,
I know this question has been asked before, but I've looked around and can't seem to find a solution. I'm trying to crawl a distributor's website to get product inventory details and I can't figure out where to put the site's username and password in the crawl web process. I've copied my code below, could someone tell me where the login info should go?

<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.2.008">
  <operator activated="true" class="process" compatibility="5.2.008" expanded="true" name="Process">
    <process expanded="true" height="100" width="212">
      <operator activated="true" class="web:crawl_web" compatibility="5.3.000" expanded="true" height="60" name="Crawl Web" width="90" x="112" y="30">
        <parameter key="url" value=""/>
        <list key="crawling_rules"/>
        <parameter key="output_dir" value="/Desktop/"/>
        <parameter key="extension" value="html"/>
        <parameter key="max_pages" value="25"/>
        <parameter key="domain" value="server"/>
        <parameter key="delay" value="500"/>
        <parameter key="max_page_size" value="1000"/>
        <parameter key="obey_robot_exclusion" value="false"/>
      <connect from_op="Crawl Web" from_port="Example Set" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>

Pages: [1]
Jump to: