fix: gb64.com scraper working

This commit is contained in:
lantzelot-swe 2021-01-15 23:15:38 +01:00
parent 46f97d96be
commit e21af5691e
7 changed files with 211 additions and 101 deletions

View File

@ -1,26 +1,4 @@
-More sites for the scraper.
-screenshots cropping and resizing. -1.0.3
-Always save a screenshot with 320x200 resolution. Now it can have the original size, which doesn't seem to work with the carousel? -1.0.3
-Add button for running VICE with the system settings selected, to be able to create csf files more easily. -1.0.3
-More sites for the scraper. (c64.com and gb64.com done)
-Allow for saving without cover or one missing screenshot. Use "missing" for it instead.
-Add support for languages for the description field
-Bug: prg files for vic-20?
-Bug: editing fields:
"When editing a game and editing the Game Tile, Year, Author, Composer and Description if you want to edit a word or number it allows you to do one character then the cursor(|) jumps to the end of the line or end of the description.
Like the Year ..
1999
so put cursor here.. (|) cursor
199|9
to change it to 1989 when you press backspace to delete 9 then it says..
199
and then the cursor end up after the number..
199|
so if you press 8 to make it say 1989 it then says..
1998|
the cursor should be here..
19|9
The cursor should stay were you put it not jump to the end of the line."
-Bug: renaming with capital letters: -1.0.3
"You can't edit the Game Title if the game is in the PCUGM already..? I wanted to change the games name "Great court" cause "court" is meant to be "Court" not "court", it needs a capital C, its saying name it a different name, I know it sounds a bit silly but I thought I report it anyway.. :)"

View File

@ -180,13 +180,13 @@ public class C64comOptionsPanel extends JPanel
gbc_composerCheckBox.anchor = GridBagConstraints.WEST;
gbc_composerCheckBox.insets = new Insets(0, 0, 5, 5);
gbc_composerCheckBox.gridx = 1;
gbc_composerCheckBox.gridy = 2;
gbc_composerCheckBox.gridy = 1;
fieldsPanel.add(getComposerCheckBox(), gbc_composerCheckBox);
GridBagConstraints gbc_coverCheckBox = new GridBagConstraints();
gbc_coverCheckBox.anchor = GridBagConstraints.WEST;
gbc_coverCheckBox.insets = new Insets(0, 0, 5, 5);
gbc_coverCheckBox.gridx = 1;
gbc_coverCheckBox.gridy = 3;
gbc_coverCheckBox.gridy = 2;
fieldsPanel.add(getCoverCheckBox(), gbc_coverCheckBox);
GridBagConstraints gbc_yearCheckBox = new GridBagConstraints();
gbc_yearCheckBox.insets = new Insets(0, 0, 5, 5);
@ -195,10 +195,10 @@ public class C64comOptionsPanel extends JPanel
gbc_yearCheckBox.gridy = 3;
fieldsPanel.add(getYearCheckBox(), gbc_yearCheckBox);
GridBagConstraints gbc_gameCheckBox = new GridBagConstraints();
gbc_gameCheckBox.insets = new Insets(0, 0, 5, 5);
gbc_gameCheckBox.insets = new Insets(0, 0, 0, 5);
gbc_gameCheckBox.anchor = GridBagConstraints.WEST;
gbc_gameCheckBox.gridx = 1;
gbc_gameCheckBox.gridy = 1;
gbc_gameCheckBox.gridy = 4;
fieldsPanel.add(getGameCheckBox(), gbc_gameCheckBox);
GridBagConstraints gbc_genreCheckBox = new GridBagConstraints();
gbc_genreCheckBox.insets = new Insets(0, 0, 0, 5);
@ -207,10 +207,10 @@ public class C64comOptionsPanel extends JPanel
gbc_genreCheckBox.gridy = 4;
fieldsPanel.add(getGenreCheckBox(), gbc_genreCheckBox);
GridBagConstraints gbc_screensCheckBox = new GridBagConstraints();
gbc_screensCheckBox.insets = new Insets(0, 0, 0, 5);
gbc_screensCheckBox.insets = new Insets(0, 0, 5, 5);
gbc_screensCheckBox.anchor = GridBagConstraints.WEST;
gbc_screensCheckBox.gridx = 1;
gbc_screensCheckBox.gridy = 4;
gbc_screensCheckBox.gridy = 3;
fieldsPanel.add(getScreensCheckBox(), gbc_screensCheckBox);
}
return fieldsPanel;

View File

@ -39,7 +39,6 @@ public class Gb64comOptionsPanel extends JPanel
private JCheckBox authorCheckBox;
private JCheckBox yearCheckBox;
private JCheckBox gameCheckBox;
private JCheckBox coverCheckBox;
private JCheckBox screensCheckBox;
private ScraperManager scraper;
private JButton connectButton;
@ -180,14 +179,8 @@ public class Gb64comOptionsPanel extends JPanel
gbc_composerCheckBox.anchor = GridBagConstraints.WEST;
gbc_composerCheckBox.insets = new Insets(0, 0, 5, 5);
gbc_composerCheckBox.gridx = 1;
gbc_composerCheckBox.gridy = 2;
gbc_composerCheckBox.gridy = 1;
fieldsPanel.add(getComposerCheckBox(), gbc_composerCheckBox);
GridBagConstraints gbc_coverCheckBox = new GridBagConstraints();
gbc_coverCheckBox.anchor = GridBagConstraints.WEST;
gbc_coverCheckBox.insets = new Insets(0, 0, 5, 5);
gbc_coverCheckBox.gridx = 1;
gbc_coverCheckBox.gridy = 3;
fieldsPanel.add(getCoverCheckBox(), gbc_coverCheckBox);
GridBagConstraints gbc_yearCheckBox = new GridBagConstraints();
gbc_yearCheckBox.insets = new Insets(0, 0, 5, 5);
gbc_yearCheckBox.anchor = GridBagConstraints.WEST;
@ -198,7 +191,7 @@ public class Gb64comOptionsPanel extends JPanel
gbc_gameCheckBox.insets = new Insets(0, 0, 5, 5);
gbc_gameCheckBox.anchor = GridBagConstraints.WEST;
gbc_gameCheckBox.gridx = 1;
gbc_gameCheckBox.gridy = 1;
gbc_gameCheckBox.gridy = 3;
fieldsPanel.add(getGameCheckBox(), gbc_gameCheckBox);
GridBagConstraints gbc_genreCheckBox = new GridBagConstraints();
gbc_genreCheckBox.insets = new Insets(0, 0, 0, 5);
@ -207,10 +200,10 @@ public class Gb64comOptionsPanel extends JPanel
gbc_genreCheckBox.gridy = 4;
fieldsPanel.add(getGenreCheckBox(), gbc_genreCheckBox);
GridBagConstraints gbc_screensCheckBox = new GridBagConstraints();
gbc_screensCheckBox.insets = new Insets(0, 0, 0, 5);
gbc_screensCheckBox.insets = new Insets(0, 0, 5, 5);
gbc_screensCheckBox.anchor = GridBagConstraints.WEST;
gbc_screensCheckBox.gridx = 1;
gbc_screensCheckBox.gridy = 4;
gbc_screensCheckBox.gridy = 2;
fieldsPanel.add(getScreensCheckBox(), gbc_screensCheckBox);
}
return fieldsPanel;
@ -269,17 +262,6 @@ public class Gb64comOptionsPanel extends JPanel
return gameCheckBox;
}
private JCheckBox getCoverCheckBox()
{
if (coverCheckBox == null)
{
coverCheckBox = new JCheckBox("Cover");
coverCheckBox.setSelected(true);
coverCheckBox.setEnabled(false);
}
return coverCheckBox;
}
private JCheckBox getScreensCheckBox()
{
if (screensCheckBox == null)
@ -297,7 +279,6 @@ public class Gb64comOptionsPanel extends JPanel
authorCheckBox.setEnabled(enable);
yearCheckBox.setEnabled(enable);
gameCheckBox.setEnabled(enable);
coverCheckBox.setEnabled(enable);
screensCheckBox.setEnabled(enable);
genreCheckBox.setEnabled(enable);
composerCheckBox.setEnabled(enable);
@ -312,7 +293,8 @@ public class Gb64comOptionsPanel extends JPanel
returnValue.setGenre(genreCheckBox.isSelected());
//No description available
returnValue.setDescription(false);
returnValue.setCover(coverCheckBox.isSelected());
//No cover available
returnValue.setCover(false);
returnValue.setScreenshots(screensCheckBox.isSelected());
returnValue.setComposer(composerCheckBox.isSelected());
returnValue.setGame(gameCheckBox.isSelected());

View File

@ -1,6 +1,7 @@
package se.lantz.manager;
import java.awt.image.BufferedImage;
import java.io.File;
import java.util.ArrayList;
import java.util.List;
@ -110,7 +111,11 @@ public class ScraperManager
if (fields.isGame())
{
infoModel.setGamesPath(usedScraper.getGameFile());
File scrapedFile = usedScraper.getGameFile();
if (scrapedFile != null)
{
infoModel.setGamesPath(scrapedFile);
}
}
//Set system based on the scraped URL
if (usedScraper.isC64())

View File

@ -155,7 +155,7 @@ public class C64comScraper implements Scraper
}
catch (IOException e)
{
ExceptionHandler.handleException(e, "Could not scrape info");
ExceptionHandler.handleException(e, "Could not scrape information");
}
}

View File

@ -1,43 +1,67 @@
package se.lantz.scraper;
import java.awt.image.BufferedImage;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.imageio.ImageIO;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import se.lantz.model.data.ScraperFields;
import se.lantz.util.ExceptionHandler;
import se.lantz.util.FileManager;
public class GamebaseScraper implements Scraper
{
private static final Logger logger = LoggerFactory.getLogger(GamebaseScraper.class);
Map<String, String> genreMap = new HashMap<>();
private String gamebaseGameUrl;
private List<String> scrapedMusicList = new ArrayList<>();
private String gamebaseGameUrl;
private String titleCssQuery =
"body > table:eq(3) > tbody > tr > td.back > table > tbody > tr > td > table > tbody > tr:eq(0) > td > table > tbody > tr > td:eq(0) > font > b";
private String authorCssQuery =
"body > table:eq(3) > tbody > tr > td.back > table > tbody > tr > td > table > tbody > tr:eq(1) > td > table > tbody > tr:eq(1) > td:eq(1) > table > tbody > tr > td:eq(0) > table:eq(1) > tbody > tr > td > table > tbody > tr:eq(0) > td > font:eq(2) > a:eq(1) > b";
private String yearCssQuery =
"body > table:eq(3) > tbody > tr > td.back > table > tbody > tr > td > table > tbody > tr:eq(1) > td > table > tbody > tr:eq(1) > td:eq(1) > table > tbody > tr > td:eq(0) > table:eq(1) > tbody > tr > td > table > tbody > tr:eq(0) > td > font:eq(2) > a:eq(0) > b";
private String genreCssQuery =
"body > table:eq(3) > tbody > tr > td.back > table > tbody > tr > td > table > tbody > tr:eq(1) > td > table > tbody > tr:eq(1) > td:eq(1) > table > tbody > tr > td:eq(0) > table:eq(1) > tbody > tr > td > table > tbody > tr:eq(5) > td > font:eq(2) > a > b";
private String composerCssQuery =
"body > table:eq(3) > tbody > tr > td.back > table > tbody > tr > td > table > tbody > tr:eq(1) > td > table > tbody > tr:eq(1) > td:eq(1) > table > tbody > tr > td:eq(0) > table:eq(1) > tbody > tr > td > table > tbody > tr:eq(1) > td > font:eq(2) > a > b";
private String screensCssQuery =
"body > table:eq(3) > tbody > tr > td.back > table > tbody > tr > td > table > tbody > tr:eq(1) > td > table > tbody > tr:eq(1) > td:eq(1) > table > tbody > tr > td:eq(2) > div > table > tbody > tr:eq(1) > td > div > table > tbody > tr:eq(1) > td:eq(1) > img";
private String gameCssQuery =
"body > table:eq(3) > tbody > tr > td.back > table > tbody > tr > td > table > tbody > tr:eq(1) > td > table > tbody > tr:eq(1) > td:eq(1) > table > tbody > tr > td:eq(2) > div > table > tbody > tr:eq(3) > td > font > table > tbody > tr:eq(1) > td > b > a";
private String scrapedTitle = "";
private String scrapedAuthor = "";
private int scrapedYear = 1985;
private String scrapedDescription = "";
private String scrapedGenre = "";
private String scrapedGenre = "";
private String scrapedComposer = "";
private BufferedImage scrapedCover = null;
private File scrapedFile;
public GamebaseScraper()
{
//Keys are Genres defined on gamebase64.com, values are supported genres in the tool
//TODO
genreMap.put("Action / Adventure / Miscellaneous / Text adventure", "adventure");
genreMap.put("Arcade / Adventure / Miscellaneous", "adventure");
genreMap.put("Racing / Driving", "driving");
genreMap.put("Strategy / Board game / Puzzle", "puzzle");
genreMap.put("Strategy / Brain / Puzzle", "puzzle");
genreMap.put("Educational", "programming");
genreMap.put("Simulation / Simulator", "simulation");
genreMap.put("Simulation / Gambling / Cards / Board Game", "simulation");
genreMap.put("Sports", "sport");
genreMap.put("Maze / Breakout", "maze");
genreMap.put("Platform", "platform");
@ -52,23 +76,104 @@ public class GamebaseScraper implements Scraper
this.gamebaseGameUrl = url;
resetFields();
}
private void resetFields()
{
scrapedTitle = "";
scrapedYear = 1985;
scrapedAuthor = "";
scrapedComposer = "";
scrapedDescription = "";
scrapedCover = null;
scrapedGenre = "";
}
@Override
public void scrapeInformation(ScraperFields fields)
{
// TODO Auto-generated method stub
Document doc;
try
{
Connection.Response result = Jsoup.connect(gamebaseGameUrl).method(Connection.Method.GET).execute();
doc = result.parse();
//Fetch title
if (fields.isTitle())
{
Elements queryElements = doc.select(titleCssQuery);
Element first = queryElements.first();
if (first != null)
{
scrapedTitle = first.text();
}
}
if (fields.isAuthor())
{
Elements queryElements = doc.select(authorCssQuery);
Element first = queryElements.first();
if (first != null)
{
scrapedAuthor = first.text();
}
}
if (fields.isYear())
{
Elements queryElements = doc.select(yearCssQuery);
Element first = queryElements.first();
if (first != null)
{
try
{
scrapedYear = Integer.parseInt(first.text().trim());
}
catch (Exception e)
{
logger.error("Could not scrape year for {}", scrapedTitle);
}
}
}
if (fields.isGenre())
{
Elements queryElements = doc.select(genreCssQuery);
Element first = queryElements.first();
if (first != null)
{
scrapedGenre = mapGenre(first.text());
}
}
if (fields.isComposer())
{
Elements queryElements = doc.select(composerCssQuery);
Element first = queryElements.first();
if (first != null)
{
scrapedComposer = first.text();
}
}
if (fields.isGame())
{
scrapeGame(doc);
}
}
catch (IOException e)
{
ExceptionHandler.handleException(e, "Could not scrape information");
}
}
private String mapGenre(String genreFromGb64com)
{
//Strip subgenre
String[] genres = genreFromGb64com.split("-");
String parentGenre = genres[0].trim();
//Map towards available genres, return first one found
for (Map.Entry<String, String> entry : genreMap.entrySet())
{
if (entry.getKey().contains(parentGenre))
{
return entry.getValue();
}
}
return "";
}
@Override
@ -101,19 +206,19 @@ public class GamebaseScraper implements Scraper
{
return scrapedGenre;
}
@Override
public String getComposer()
{
return String.join(", ", scrapedMusicList);
return scrapedComposer;
}
@Override
public BufferedImage getCover()
{
return scrapedCover;
return null;
}
@Override
public File getGameFile()
{
@ -130,8 +235,62 @@ public class GamebaseScraper implements Scraper
@Override
public List<BufferedImage> scrapeScreenshots()
{
// TODO Auto-generated method stub
return new ArrayList<>();
List<BufferedImage> screensList = new ArrayList<>();
Document doc;
try
{
Connection.Response result = Jsoup.connect(gamebaseGameUrl).method(Connection.Method.GET).execute();
doc = result.parse();
//Fetch the right element
Elements coverElements = doc.select(screensCssQuery);
if (coverElements.first() != null)
{
Element coverElement = coverElements.first();
String absoluteUrl = coverElement.absUrl("src");
//Try to fetch 6 screenshots based on number " 1, 2, 3" etc
for (int i = 0; i < 6; i++)
{
URL imageUrl = new URL(absoluteUrl);
screensList.add(ImageIO.read(imageUrl));
//Replace number in url
if (i == 0)
{
absoluteUrl = absoluteUrl.replace(".png", "_1.png");
}
else
{
absoluteUrl = absoluteUrl.replace("_" + Integer.toString(i), "_" + Integer.toString(i + 1));
}
}
}
}
catch (IOException e)
{
logger.warn("Could not scrape all six screenshots");
}
return screensList;
}
private void scrapeGame(Document doc)
{
Elements queryElements = doc.select(gameCssQuery);
Element gameElement = queryElements.first();
if (gameElement != null)
{
try
{
String urlString = gameElement.attr("abs:href");
URL url = new URL(urlString);
URLConnection conn = url.openConnection();
InputStream inputStream = conn.getInputStream();
//create a temp file and fetch the content
scrapedFile = FileManager.createTempFileForScraper(new BufferedInputStream(inputStream));
logger.debug("File to include as game: {}", scrapedFile != null ? scrapedFile.getAbsolutePath() : null);
}
catch (Exception e)
{
logger.error("Could not scrape game file for " + scrapedTitle, e);
}
}
}
}

View File

@ -31,35 +31,21 @@ public class MobyGamesScraper implements Scraper
private static final Logger logger = LoggerFactory.getLogger(MobyGamesScraper.class);
private String mobyGamesGameUrl = "";
private String descriptionCssQuery = "#main > div > div:eq(2) > div";
private String titleCssQuery = ".niceHeaderTitle > a";
private String authorCssQuery = "#coreGameRelease > div:contains(Published)";
private String yearCssQuery = "#coreGameRelease > div:contains(Released)";
private String genreCssQuery = "#coreGameGenre > div > div:contains(Genre)";
private String coverCssQuery = "#coreGameCover > a > img";
private String screensCssQuery = ".thumbnail-image-wrapper > a";
Map<String, String> genreMap = new HashMap<>();
private String scrapedTitle = "";
private String scrapedAuthor = "";
private int scrapedYear = 1985;
private String scrapedDescription = "";
private String scrapedGenre = "";
private String scrapedComposer = "";
private BufferedImage scrapedCover = null;
public MobyGamesScraper()
@ -81,7 +67,7 @@ public class MobyGamesScraper implements Scraper
this.mobyGamesGameUrl = url;
resetFields();
}
private void resetFields()
{
scrapedTitle = "";
@ -149,11 +135,11 @@ public class MobyGamesScraper implements Scraper
{
scrapedCover = scrapeCover(doc);
}
}
catch (IOException e)
{
ExceptionHandler.handleException(e, "Could not scrape title");
ExceptionHandler.handleException(e, "Could not scrape information");
}
}
@ -186,7 +172,7 @@ public class MobyGamesScraper implements Scraper
{
return scrapedGenre;
}
@Override
public String getComposer()
{
@ -252,7 +238,7 @@ public class MobyGamesScraper implements Scraper
}
return value;
}
public String scrapeComposer(Document doc)
{
String value = "";
@ -267,7 +253,7 @@ public class MobyGamesScraper implements Scraper
{
if (node instanceof TextNode)
{
String test = ((TextNode)node).text();
String test = ((TextNode) node).text();
if (test.contains("Music") || test.contains("music"))
{
musicFound = true;
@ -275,7 +261,7 @@ public class MobyGamesScraper implements Scraper
}
else if (node instanceof Element && musicFound)
{
value = ((Element)node).text();
value = ((Element) node).text();
if (!value.isEmpty())
{
break;
@ -379,7 +365,7 @@ public class MobyGamesScraper implements Scraper
}
return null;
}
@Override
public boolean isC64()
{