package dmi_demo; import java.net.*; import java.sql.*; import java.util.*; import java.util.regex.*; import java.io.*; public class Initializer { Connection dbconn; public Initializer () throws SQLException, ClassNotFoundException, IOException { dbconn = DmiDB.connect(); dbconn.setAutoCommit(false); } public ArrayList fetchWebPage (String addr) { ArrayList result = new ArrayList (); try { URL url = new URL(addr); HttpURLConnection conn = (HttpURLConnection) url.openConnection(); conn.connect(); InputStream in = conn.getInputStream(); String line; BufferedReader br = new BufferedReader(new InputStreamReader(in)); while((line = br.readLine()) != null) { result.add(line); } conn.disconnect(); } catch (MalformedURLException e) { System.out.println("Bad URL while looking up site "+addr+": "+e.getMessage()); } catch (IOException e) { System.out.println("IO exception while looking up site "+addr+": "+e.getMessage()); } return result; } // This step is a little bit kludgy, relying on pattern matching statements // from the regex library to extract content from the page. With more // time, this could be done properly by representing the page as a DOM tree // and searching for specific elements. public void populateDB () { // look up Fortune 500 companies String addr = "http://www.usatoday.com/money/companies/2004-03-22-fortune-500-list_x.htm"; ArrayList lines = fetchWebPage(addr); String line = ""; String sql = "INSERT INTO DMI_Company (Name) VALUES (?)"; try { PreparedStatement ps = dbconn.prepareStatement(sql); for (Iterator it = lines.iterator(); it.hasNext(); line = it.next()) { //line:

11. Hewlett-Packard Co., Palo Alto, Calif., 14, $73.061

Pattern p = Pattern.compile ("inside\\-copy.*\\d+\\. (.*?)\\,"); Matcher m = p.matcher(line); if (m.find()) { String company = m.group(1); // Some of these string have ampersands in them Pattern p2 = Pattern.compile("\\&\\;"); Matcher m2 = p2.matcher(company); company = m2.replaceAll("\\&"); ps.setString(1, company); ps.execute(); } } ps.close(); dbconn.commit(); } catch (SQLException e) { System.out.println("SQL Exception while trying to populate companies: "+e.getMessage()); } // look up auto parts addr = "http://www.carpartslist.com/partslist/"; try { sql = "INSERT INTO DMI_Part (Description) VALUES (?)"; PreparedStatement ps = dbconn.prepareStatement(sql); for (char letter = 'A'; letter <= 'Z'; letter ++) { lines = fetchWebPage(addr+letter+"/"); line = ""; for (Iterator it = lines.iterator(); it.hasNext(); line = it.next()) { Pattern p = Pattern.compile("class=\\\"content\\\"(.*)class=\\\"tiny\\\""); Matcher m = p.matcher(line); if (m.find()) { String innerline = m.group(1); Pattern p2 = Pattern.compile ("\\(.*?)\\<\\/a\\>"); Matcher m2 = p2.matcher(innerline); while (m2.find()) { String part = m2.group(1); System.out.println("found "+part+" on page "+letter); ps.setString(1, part); ps.execute(); } } } } ps.close(); dbconn.commit(); } catch (SQLException e) { System.out.println("SQL Exception while trying to populate parts: "+e.getMessage()); } } public void setPartPrices () { Random r = new Random(); try { String sql = "SELECT Id FROM DMI_Part"; PreparedStatement queryps = dbconn.prepareStatement(sql); sql = "UPDATE DMI_Part SET Price=? WHERE Id=?"; PreparedStatement updateps = dbconn.prepareStatement(sql); // find all parts to update ResultSet rs = queryps.executeQuery(); while (rs.next()) { int id = rs.getInt("Id"); //random price between $10 and $1000 double price = r.nextFloat()*990. + 10.; System.out.println("Setting price of part "+id+" to "+price); updateps.setDouble(1, price); updateps.setInt(2, id); updateps.execute(); } dbconn.commit(); rs.close(); queryps.close(); updateps.close(); } catch (SQLException e) { System.out.println("SQL Exception while trying to updates prices: "+e.getMessage()); } } }