Select screenshots for scraper wip

This commit is contained in:
lantzelot-swe 2020-12-26 00:07:08 +01:00
parent 5c701e08af
commit 38315712b0
6 changed files with 382 additions and 149 deletions

View File

@ -8,6 +8,8 @@ import java.awt.GridBagLayout;
import java.awt.Insets;
import java.awt.event.ActionEvent;
import java.awt.event.ActionListener;
import java.awt.image.BufferedImage;
import java.util.List;
import javax.swing.JButton;
import javax.swing.JPanel;
@ -17,8 +19,10 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import se.lantz.gui.scraper.ScraperDialog;
import se.lantz.gui.scraper.ScreenshotsSelectionDialog;
import se.lantz.model.MainViewModel;
import se.lantz.model.data.GameListData;
import se.lantz.model.data.ScraperFields;
public class GameDetailsBackgroundPanel extends JPanel
{
@ -220,8 +224,30 @@ public class GameDetailsBackgroundPanel extends JPanel
if (scraperDialog.showDialog())
{
MainWindow.getInstance().setWaitCursor(true);
model.scrapeGameInformation(scraperDialog.getScraperFields());
ScraperFields scraperFields = scraperDialog.getScraperFields();
model.scrapeGameInformation(scraperFields);
MainWindow.getInstance().setWaitCursor(false);
if (scraperFields.isScreenshots())
{
//Scrape the screens and check how many there are.
List<BufferedImage> screenshots = model.scrapeScreenshots();
if (screenshots.size() > 2)
{
//Show dialog for selecting screenshots
ScreenshotsSelectionDialog screenDialog = new ScreenshotsSelectionDialog(MainWindow.getInstance(), screenshots);
screenDialog.pack();
screenDialog.setLocationRelativeTo(MainWindow.getInstance());
if (screenDialog.showDialog())
{
List<BufferedImage> selectedScreenshots = screenDialog.getSelectedScreenshots();
model.setScreenshotImages(selectedScreenshots.get(0), selectedScreenshots.get(1));
}
}
else
{
//TODO
}
}
}
}

View File

@ -0,0 +1,47 @@
package se.lantz.gui.scraper;
import java.awt.LayoutManager;
import javax.swing.JPanel;
import java.awt.GridBagLayout;
import javax.swing.JLabel;
import java.awt.GridBagConstraints;
import javax.swing.JCheckBox;
import java.awt.Insets;
public class ScreenshotCheckBoxPanel extends JPanel
{
private JLabel imageLabel;
private JCheckBox checkBox;
public ScreenshotCheckBoxPanel()
{
GridBagLayout gridBagLayout = new GridBagLayout();
setLayout(gridBagLayout);
GridBagConstraints gbc_imageLabel = new GridBagConstraints();
gbc_imageLabel.weightx = 1.0;
gbc_imageLabel.insets = new Insets(0, 0, 5, 0);
gbc_imageLabel.gridx = 0;
gbc_imageLabel.gridy = 0;
add(getImageLabel(), gbc_imageLabel);
GridBagConstraints gbc_checkBox = new GridBagConstraints();
gbc_checkBox.anchor = GridBagConstraints.NORTH;
gbc_checkBox.weighty = 1.0;
gbc_checkBox.weightx = 1.0;
gbc_checkBox.gridx = 0;
gbc_checkBox.gridy = 1;
add(getCheckBox(), gbc_checkBox);
}
public JLabel getImageLabel() {
if (imageLabel == null) {
imageLabel = new JLabel("");
}
return imageLabel;
}
public JCheckBox getCheckBox() {
if (checkBox == null) {
checkBox = new JCheckBox("");
}
return checkBox;
}
}

View File

@ -0,0 +1,42 @@
package se.lantz.gui.scraper;
import java.awt.BorderLayout;
import java.awt.Frame;
import java.awt.image.BufferedImage;
import java.util.List;
import javax.swing.JPanel;
import se.lantz.gui.BaseDialog;
public class ScreenshotsSelectionDialog extends BaseDialog
{
private ScreenshotsSelectionPanel mbyGamesPanel;
private List<BufferedImage> screenshotInfoList;
public ScreenshotsSelectionDialog(Frame owner, List<BufferedImage> screenshotInfoList)
{
super(owner);
this.screenshotInfoList = screenshotInfoList;
JPanel content = new JPanel();
content.setLayout(new BorderLayout());
content.add(getScreenshotsSelectionPanel(), BorderLayout.CENTER);
addContent(content);
setTitle("Scrape game information");
this.setResizable(false);
}
private ScreenshotsSelectionPanel getScreenshotsSelectionPanel()
{
if (mbyGamesPanel == null)
{
mbyGamesPanel = new ScreenshotsSelectionPanel(screenshotInfoList);
}
return mbyGamesPanel;
}
public List<BufferedImage> getSelectedScreenshots()
{
return getScreenshotsSelectionPanel().getSelectedScreenshots();
}
}

View File

@ -0,0 +1,82 @@
package se.lantz.gui.scraper;
import java.awt.GridBagConstraints;
import java.awt.GridBagLayout;
import java.awt.GridLayout;
import java.awt.Insets;
import java.awt.image.BufferedImage;
import java.util.ArrayList;
import java.util.List;
import javax.swing.ImageIcon;
import javax.swing.JLabel;
import javax.swing.JPanel;
public class ScreenshotsSelectionPanel extends JPanel
{
private List<BufferedImage> screenshots;
private List<ScreenshotCheckBoxPanel> screenshotCheckBoxList = new ArrayList<>();
private JLabel infoLabel;
private JPanel screenPanel;
public ScreenshotsSelectionPanel(List<BufferedImage> screenshots)
{
this.screenshots = screenshots;
GridBagLayout gridBagLayout = new GridBagLayout();
setLayout(gridBagLayout);
GridBagConstraints gbc_infoLabel = new GridBagConstraints();
gbc_infoLabel.weightx = 1.0;
gbc_infoLabel.anchor = GridBagConstraints.WEST;
gbc_infoLabel.insets = new Insets(10, 5, 5, 0);
gbc_infoLabel.gridx = 0;
gbc_infoLabel.gridy = 0;
add(getInfoLabel(), gbc_infoLabel);
GridBagConstraints gbc_screenPanel = new GridBagConstraints();
gbc_screenPanel.weighty = 1.0;
gbc_screenPanel.weightx = 1.0;
gbc_screenPanel.fill = GridBagConstraints.BOTH;
gbc_screenPanel.gridx = 0;
gbc_screenPanel.gridy = 1;
add(getScreenPanel(), gbc_screenPanel);
}
private JLabel getInfoLabel()
{
if (infoLabel == null)
{
infoLabel = new JLabel("Select two screenshots below:");
}
return infoLabel;
}
private JPanel getScreenPanel()
{
if (screenPanel == null)
{
screenPanel = new JPanel();
screenPanel.setLayout(new GridLayout(2, 2, 5, 5));
for (int i = 0; i < screenshots.size(); i++)
{
ScreenshotCheckBoxPanel checkBox = new ScreenshotCheckBoxPanel();
checkBox.getImageLabel().setIcon(new ImageIcon(screenshots.get(i)));
screenshotCheckBoxList.add(checkBox);
screenPanel.add(checkBox);
}
}
return screenPanel;
}
public List<BufferedImage> getSelectedScreenshots()
{
List<BufferedImage> returnList = new ArrayList<>();
for (int i = 0; i < screenshotCheckBoxList.size(); i++)
{
if (screenshotCheckBoxList.get(i).getCheckBox().isSelected())
{
returnList.add(screenshots.get(i));
}
}
return returnList;
}
}

View File

@ -7,8 +7,6 @@ import java.io.File;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.swing.DefaultComboBoxModel;
import javax.swing.ListModel;
@ -429,30 +427,27 @@ public class MainViewModel extends AbstractModel
public void scrapeGameInformation(ScraperFields fields)
{
scraper.scrapeInformation(fields);
if (fields.isTitle())
{
infoModel.setTitle(scraper.scrapeTitle());
infoModel.setTitle(scraper.getTitle());
}
if (fields.isAuthor())
{
infoModel.setAuthor(scraper.scrapeAuthor());
infoModel.setAuthor(scraper.getAuthor());
}
if (fields.isYear())
{
Pattern p = Pattern.compile("\\d+");
Matcher m = p.matcher(scraper.scrapeYear());
if (m.find())
{
infoModel.setYear(Integer.parseInt(m.group()));
}
{
infoModel.setYear(scraper.getYear());
}
if (fields.isDescription())
{
infoModel.setDescription(scraper.scrapeDescription());
infoModel.setDescription(scraper.getDescription());
}
if (fields.isGenre())
{
String genre = scraper.scrapeGenre();
String genre = scraper.getGenre();
if (!genre.isEmpty())
{
infoModel.setGenre(genre);
@ -460,20 +455,18 @@ public class MainViewModel extends AbstractModel
}
if (fields.isCover())
{
infoModel.setCoverImage(scraper.scrapeCover());
}
if (fields.isScreenshots())
{ //TODO: Make it possible to select which screenshot to use
List<BufferedImage> images = scraper.scrapeScreenshots();
if (images.size() > 0)
{
infoModel.setScreen1Image(images.get(0));
}
if (images.size() > 1)
{
infoModel.setScreen2Image(images.get(1));
}
infoModel.setCoverImage(scraper.getCover());
}
}
public List<BufferedImage> scrapeScreenshots()
{
return scraper.scrapeScreenshots();
}
public void setScreenshotImages(BufferedImage screen1, BufferedImage screen2)
{
getInfoModel().setScreen1Image(screen1);
getInfoModel().setScreen2Image(screen2);
}
}

View File

@ -1,18 +1,16 @@
package se.lantz.util;
import java.awt.image.BufferedImage;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.imageio.ImageIO;
import javax.swing.ImageIcon;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
@ -23,6 +21,8 @@ import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import se.lantz.model.data.ScraperFields;
public class MobyGamesScraper
{
private static final Logger logger = LoggerFactory.getLogger(MobyGamesScraper.class);
@ -44,23 +44,35 @@ public class MobyGamesScraper
private String genreCssQuery = "#coreGameGenre > div > div:contains(Genre)";
private String coverCssQuery = "#coreGameCover > a > img";
private String screensCssQuery = ".thumbnail-image-wrapper > a";
private long startTime = 0L;
Map<String, String> genreMap = new HashMap<>();
private String scrapedTitle = "";
private String scrapedAuthor = "";
private int scrapedYear = 1985;
private String scrapedDescription = "";
private String scrapedGenre = "";
private BufferedImage scrapedCover = null;
public MobyGamesScraper()
{
// TODO Auto-generated constructor stub
//*[@id="main"]/div/div[2]/h1/a
//*[@id="main"]/div/div[3]/div[1]/h2[1]
//*[@id="main"]/div/div[2]/h1/a
//*[@id="main"]/div/div[3]/div[1]/h2[1]
//*[@id="coreGameCover"]/a/img
//Keys are Genres defined on MobyGames, values are supported genres in the tool
genreMap.put("Adventure, Role-Playing (RPG)", "adventure");
genreMap.put("Racing / driving", "driving");
@ -70,12 +82,6 @@ public class MobyGamesScraper
genreMap.put("Sports", "sport");
}
public static void main(String[] args)
{
MobyGamesScraper scraper = new MobyGamesScraper();
scraper.scrapeMobyGames();
}
public void connectToMobyGames(String url) throws IOException
{
this.mobyGamesGameUrl = "";
@ -83,151 +89,194 @@ public class MobyGamesScraper
this.mobyGamesGameUrl = url;
}
public String scrapeTitle()
public void scrapeInformation(ScraperFields fields)
{
String value = "";
Document doc;
try
{
Connection.Response result = Jsoup.connect(mobyGamesGameUrl).method(Connection.Method.GET).execute();
doc = result.parse();
//Fetch the right element
Elements queryElements = doc.select(titleCssQuery);
Element first = queryElements.first();
if (first != null)
//Fetch title
if (fields.isTitle())
{
value = first.text();
Elements queryElements = doc.select(titleCssQuery);
Element first = queryElements.first();
if (first != null)
{
scrapedTitle = first.text();
}
}
if (fields.isAuthor())
{
scrapedAuthor = scarpeElementValue(doc, authorCssQuery);
}
if (fields.isYear())
{
Pattern p = Pattern.compile("\\d+");
Matcher m = p.matcher(scarpeElementValue(doc, yearCssQuery));
if (m.find())
{
scrapedYear = Integer.parseInt(m.group());
}
}
if (fields.isDescription())
{
scrapedDescription = scrapeDescription(doc);
}
if (fields.isGenre())
{
String genre = scrapeGenre(doc);
if (!genre.isEmpty())
{
scrapedGenre = genre;
}
}
if (fields.isCover())
{
scrapedCover = scrapeCover(doc);
}
//TODO: Screens
}
catch (IOException e)
{
ExceptionHandler.handleException(e, "Could not scrape title");
}
return value;
//
// if (fields.isScreenshots())
// { //TODO: Make it possible to select which screenshot to use
// List<BufferedImage> images = scraper.scrapeScreenshots();
// if (images.size() > 0)
// {
// infoModel.setScreen1Image(images.get(0));
// }
// if (images.size() > 1)
// {
// infoModel.setScreen2Image(images.get(1));
// }
// }
}
public String scrapeAuthor()
public String getTitle()
{
return scarpeElementValue(authorCssQuery);
return scrapedTitle;
}
public String scrapeYear()
public String getAuthor()
{
return scarpeElementValue(yearCssQuery);
}
private void scrapeMobyGames()
{
startTime = System.currentTimeMillis();
logger.debug("Scraping {} ...", mobyGamesBaseUrl);
scrapeDescription();
System.out.println("Author: " + scarpeElementValue(authorCssQuery));
System.out.println("Year: " + scarpeElementValue(yearCssQuery));
System.out.println("Genre: " + scarpeElementValue(genreCssQuery));
scrapeCover();
return scrapedAuthor;
}
public String scrapeDescription()
public int getYear()
{
Document doc;
try
return scrapedYear;
}
public String getDescription()
{
return scrapedDescription;
}
public String getGenre()
{
return scrapedGenre;
}
public BufferedImage getCover()
{
return scrapedCover;
}
// public String scrapeTitle()
// {
// String value = "";
// Document doc;
// try
// {
// Connection.Response result = Jsoup.connect(mobyGamesGameUrl).method(Connection.Method.GET).execute();
// doc = result.parse();
// //Fetch the right element
// Elements queryElements = doc.select(titleCssQuery);
// Element first = queryElements.first();
// if (first != null)
// {
// value = first.text();
// }
// }
// catch (IOException e)
// {
// ExceptionHandler.handleException(e, "Could not scrape title");
// }
// return value;
// }
public String scrapeDescription(Document doc)
{
//Fetch the right element
Elements descriptionDiv = doc.select(descriptionCssQuery);
if (descriptionDiv.first() != null)
{
Connection.Response result = Jsoup.connect(mobyGamesGameUrl).method(Connection.Method.GET).execute();
doc = result.parse();
//Fetch the right element
Elements descriptionDiv = doc.select(descriptionCssQuery);
if (descriptionDiv.first() != null)
//Get all text elements
List<TextNode> textNodes = descriptionDiv.first().textNodes();
StringBuilder builder = new StringBuilder();
for (TextNode textNode : textNodes)
{
//Get all text elements
List<TextNode> textNodes = descriptionDiv.first().textNodes();
StringBuilder builder = new StringBuilder();
for (TextNode textNode : textNodes)
if (textNode.text().length() > 1)
{
if (textNode.text().length() > 1)
{
builder.append(textNode.text());
}
builder.append(textNode.text());
}
return builder.toString();
}
}
catch (IOException e)
{
ExceptionHandler.handleException(e, "Could not scrape description");
return builder.toString();
}
return "";
}
public String scrapeGenre()
public String scrapeGenre(Document doc)
{
String genreFromMobyGames = scarpeElementValue(genreCssQuery);
String genreFromMobyGames = scarpeElementValue(doc, genreCssQuery);
String[] split = genreFromMobyGames.split(", ");
for (int i = 0; i < split.length; i++)
{
//Map towards available genres, return first one found
for (Map.Entry<String, String> entry : genreMap.entrySet()) {
for (Map.Entry<String, String> entry : genreMap.entrySet())
{
if (entry.getKey().contains(split[i]))
{
System.out.println(entry.getKey() + "/" + entry.getValue());
return entry.getValue();
}
}
}
}
return "";
}
private String scarpeElementValue(String cssQuery)
private String scarpeElementValue(Document doc, String cssQuery)
{
String value = "";
Document doc;
try
//Fetch the right element
Elements queryElements = doc.select(cssQuery);
Element first = queryElements.first();
if (first != null)
{
Connection.Response result = Jsoup.connect(mobyGamesGameUrl).method(Connection.Method.GET).execute();
doc = result.parse();
//Fetch the right element
Elements queryElements = doc.select(cssQuery);
Element first = queryElements.first();
if (first != null)
{
int index = queryElements.first().elementSiblingIndex();
Element valueElement = first.parent().child(index + 1);
value = valueElement.text();
}
}
catch (IOException e)
{
ExceptionHandler.handleException(e, "Could not scrape information (" + cssQuery + ")");
int index = queryElements.first().elementSiblingIndex();
Element valueElement = first.parent().child(index + 1);
value = valueElement.text();
}
return value;
}
public BufferedImage scrapeCover()
public BufferedImage scrapeCover(Document doc)
{
Document doc;
try
//Fetch the right element
Elements coverElements = doc.select(coverCssQuery);
if (coverElements.first() != null)
{
Connection.Response result = Jsoup.connect(mobyGamesGameUrl).method(Connection.Method.GET).execute();
doc = result.parse();
//Fetch the right element
Elements coverElements = doc.select(coverCssQuery);
if (coverElements.first() != null)
{
Element coverElement = coverElements.first();
String bigCoverUrl = coverElement.parent().attr("href");
return scrapeBigCover(bigCoverUrl);
}
}
catch (IOException e)
{
ExceptionHandler.handleException(e, "Could not scrape cover");
Element coverElement = coverElements.first();
String bigCoverUrl = coverElement.parent().attr("href");
return scrapeBigCover(bigCoverUrl);
}
return null;
}
private BufferedImage scrapeBigCover(String url)
{
String cssQuery = "#main > div > div:eq(1) > center > img"; //*[@id="main"]/div/div[2]/center/img
@ -244,7 +293,7 @@ public class MobyGamesScraper
String absoluteUrl = coverElement.absUrl("src");
URL imageUrl = new URL(absoluteUrl);
return ImageIO.read(imageUrl);
return ImageIO.read(imageUrl);
}
}
catch (IOException e)
@ -253,32 +302,26 @@ public class MobyGamesScraper
}
return null;
}
public List<BufferedImage> scrapeScreenshots()
{
List<BufferedImage> returnList = new ArrayList<>();
Document doc;
try
{
Connection.Response result = Jsoup.connect(mobyGamesGameUrl + "/screenshots").method(Connection.Method.GET).execute();
Connection.Response result =
Jsoup.connect(mobyGamesGameUrl + "/screenshots").method(Connection.Method.GET).execute();
doc = result.parse();
//Fetch the right element
Elements coverElements = doc.select(screensCssQuery);
logger.debug("Number of screenshots found: {}", coverElements.size());
//Only scrape first two for now
for (int i = 0; i < Math.min(2, coverElements.size()); i++)
for (Element element : coverElements)
{
String bigScreenUrl = coverElements.get(i).attr("href");
String bigScreenUrl = element.attr("href");
logger.debug("Screen URL = " + bigScreenUrl);
returnList.add(scrapeBigScreenshot(bigScreenUrl));
}
// for (Element element : coverElements)
// {
// String bigScreenUrl = element.attr("href");
// logger.debug("Screen URL = " + bigScreenUrl);
// returnList.add(scrapeBigScreenshot(bigScreenUrl));
// }
}
}
catch (IOException e)
{
@ -303,7 +346,7 @@ public class MobyGamesScraper
String absoluteUrl = coverElement.absUrl("src");
URL imageUrl = new URL(absoluteUrl);
return ImageIO.read(imageUrl);
return ImageIO.read(imageUrl);
}
}
catch (IOException e)